提交 4a493ead 编写于 作者: L liuruilong

format files

上级 d4c0395f
......@@ -17,8 +17,8 @@ limitations under the License. */
#include "operators/conv_op.h"
#include <vector>
#include "framework/op_proto_maker.h"
#include "operators/math/conv_func.h"
#include "framework/op_registry.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
......@@ -39,9 +39,9 @@ void ConvOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i],
strides[i]));
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
......
......@@ -17,9 +17,9 @@ limitations under the License. */
#include "operators/depthwise_conv_op.h"
#include <vector>
#include "framework/op_proto_maker.h"
#include "operators/math/conv_func.h"
#include "framework/op_registry.h"
#include "operators/conv_op.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
......@@ -40,9 +40,9 @@ void DepthwiseConvOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i],
strides[i]));
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
......
......@@ -14,8 +14,8 @@ limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/math/conv_func.h"
#include "operators/fusion_conv_add.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
......@@ -36,9 +36,9 @@ void FushionConvAddOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i],
strides[i]));
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
......
......@@ -18,10 +18,10 @@ limitations under the License. */
#include <string>
#include <vector>
#include "op_param.h"
#include "framework/operator.h"
#include "operators/kernel/conv_add_kernel.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h"
#include "operators/kernel/conv_add_kernel.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -36,16 +36,16 @@ void FusionConvAddReluOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i],
strides[i]));
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
}
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
......
......@@ -17,9 +17,9 @@ limitations under the License. */
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
#include "operators/kernel/conv_add_relu_kernel.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/conv_add_relu_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......@@ -42,27 +42,30 @@ class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher {
};
template <typename DeviceType, typename T>
class FusionConvAddReluOp: public framework::OperatorWithKernel<
DeviceType, FushionConvAddReluParam,
operators::ConvAddReluKernel<DeviceType, T>> {
class FusionConvAddReluOp : public framework::OperatorWithKernel<
DeviceType, FushionConvAddReluParam,
operators::ConvAddReluKernel<DeviceType, T>> {
public:
FusionConvAddReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, FushionConvAddReluParam,
operators::ConvAddReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, FushionConvAddReluParam,
operators::ConvAddReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FushionConvAddReluParam,
operators::ConvAddReluKernel<DeviceType, T>>::OperatorWithKernel;
DeviceType, FushionConvAddReluParam,
operators::ConvAddReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
#ifdef PADDLE_MOBILE_CPU
//static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new FushionConvAddReluOpMatcher());
// static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new
// FushionConvAddReluOpMatcher());
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
......
......@@ -70,7 +70,7 @@ class FushionFcOp : public framework::OperatorWithKernel<
static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
//static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
// static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
......@@ -50,7 +50,8 @@ void ConvAddKernel<CPU, float>::Compute(
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand = math::IsExpand(filter_shape_vec, strides, paddings, dilations);
bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
......
......@@ -21,7 +21,7 @@ namespace operators {
template <>
void ConvAddReluKernel<CPU, float>::Compute(
const FushionConvAddReluParam &param) const {
const FushionConvAddReluParam &param) const {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor bias = *param.Bias();
......@@ -49,9 +49,10 @@ void ConvAddReluKernel<CPU, float>::Compute(
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand = math::IsExpand(filter_shape_vec, strides, paddings, dilations);
bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
......@@ -61,14 +62,14 @@ void ConvAddReluKernel<CPU, float>::Compute(
}
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
......@@ -105,13 +106,12 @@ void ConvAddReluKernel<CPU, float>::Compute(
math::matmul<float>(filter_slice, false, col_matrix, false,
static_cast<float>(1), &out_slice,
static_cast<float>(1), true);
}
}
}
template class ConvAddReluKernel<CPU, float>;
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -21,12 +21,12 @@ limitations under the License. */
#include <arm_neon.h>
#endif
#include "framework/ddim.h"
#include "operators/op_param.h"
#include "framework/operator.h"
#include "operators/math/im2col.h"
#include "operators/math/vol2col.h"
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......@@ -34,7 +34,6 @@ namespace operators {
using framework::DDim;
using framework::OpKernelBase;
template <typename DeviceType, typename T>
class ConvAddKernel : public OpKernelBase<DeviceType, FushionConvAddParam> {
public:
......
......@@ -19,11 +19,11 @@ limitations under the License. */
#include <vector>
#include "framework/ddim.h"
#include "framework/operator.h"
#include "operators/op_param.h"
#include "operators/math/im2col.h"
#include "operators/math/vol2col.h"
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......@@ -32,7 +32,8 @@ using framework::DDim;
using framework::OpKernelBase;
template <typename DeviceType, typename T>
class ConvAddReluKernel : public OpKernelBase<DeviceType, FushionConvAddReluParam> {
class ConvAddReluKernel
: public OpKernelBase<DeviceType, FushionConvAddReluParam> {
public:
void Compute(const FushionConvAddReluParam &param) const;
};
......
......@@ -22,8 +22,8 @@ namespace paddle_mobile {
namespace operators {
template <>
void BatchNormKernel<GPU_MALI, float>::Compute(const BatchNormParam &param) const {
}
void BatchNormKernel<GPU_MALI, float>::Compute(
const BatchNormParam &param) const {}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -42,7 +42,7 @@ inline void expand_bias(Tensor &bias, int axis, const DDim &dDim) {
"the bias tensor's dims size != 1")
DDim outer_ddim = paddle_mobile::framework::slice_ddim(dDim, 0, axis + 1);
DDim inner_ddim =
paddle_mobile::framework::slice_ddim(dDim, axis + 1, dDim.size());
paddle_mobile::framework::slice_ddim(dDim, axis + 1, dDim.size());
int outer_size = paddle_mobile::framework::product(outer_ddim);
int inner_size = paddle_mobile::framework::product(inner_ddim);
bias.Resize(dDim);
......@@ -98,6 +98,6 @@ inline bool IsExpand(const std::vector<int64_t> &filter_dim,
return !(filter_1 && strides_1 && padding_0 && dilation_1);
}
}
}
}
} // namespace math
} // namespace operators
} // namespace paddle_mobile
......@@ -177,8 +177,8 @@ void InnerKernel(int m, int n, int k, float alpha, const float *A, int lda,
// 分块矩阵乘法
void InnerKernel_relu(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc,
int first_time, bool relu = false) {
const float *B, int ldb, float beta, float *C, int ldc,
int first_time, bool relu = false) {
int Buff_A_M = m;
int Buff_B_N = n;
......@@ -210,12 +210,11 @@ void InnerKernel_relu(int m, int n, int k, float alpha, const float *A, int lda,
for (i = 0; i < Buff_A_M; i += MR) {
mc = (m - i) < MR ? _mc : MR;
AddDot4x4_relu(k, alpha, &packedA[i * k], 4, &packedB[j * k], k, beta,
&C(i, j), ldc, mc, nc, relu);
&C(i, j), ldc, mc, nc, relu);
}
}
}
//计算一个更小的 4 * 4 的 C 矩阵分块
#if defined(IOS)
void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
......@@ -269,8 +268,9 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
}
void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu = false) {
// init C
int ldb, float beta, float *C, int ldc, int mc, int nc,
bool relu = false) {
// init C
float32x4_t cv0 = vdupq_n_f32(0.0);
float32x4_t cv1 = vdupq_n_f32(0.0);
float32x4_t cv2 = vdupq_n_f32(0.0);
......@@ -458,7 +458,8 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
}
void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu = false) {
int ldb, float beta, float *C, int ldc, int mc, int nc,
bool relu = false) {
int kc1 = k / 2, kc2 = k % 2;
int bytes_ldc = 4 * ldc;
int flag_alpha = (alpha == 1.0) ? 1 : 2;
......@@ -571,8 +572,8 @@ void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
[kc2] "r"(kc2), [mc] "r"(mc), [nc] "r"(nc), [alpha] "r"(alpha),
[beta] "r"(beta), [bytes_ldc] "r"(bytes_ldc),
[flag_alpha] "r"(flag_alpha), [flag_beta] "r"(flag_beta)
: "memory", "q0", "q1", "q2", "q3", "q4", "q10", "q11", "q12", "q13", "q14");
: "memory", "q0", "q1", "q2", "q3", "q4", "q10", "q11", "q12", "q13",
"q14");
if (mc != MR || nc != NR) {
int i, j;
......@@ -599,7 +600,6 @@ void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
C(i, j) = 0;
}
}
}
}
}
......@@ -664,7 +664,8 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
}
void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu) {
int ldb, float beta, float *C, int ldc, int mc, int nc,
bool relu) {
float c[16] = {0};
float reg_a0, reg_a1, reg_a2, reg_a3, reg_b0, reg_b1, reg_b2, reg_b3;
......@@ -725,7 +726,6 @@ void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
}
}
#endif
// 32位 float 矩阵乘法
......@@ -768,11 +768,11 @@ void sgemm_relu(int m, int n, int k, float alpha, const float *A, int lda,
}
if (p + KC >= k) {
InnerKernel_relu(mc, nc, kc, alpha, &A(i, p), lda, &B(p, j), ldb, beta_,
&C(i, j), ldc, i == 0, true);
InnerKernel_relu(mc, nc, kc, alpha, &A(i, p), lda, &B(p, j), ldb,
beta_, &C(i, j), ldc, i == 0, true);
} else {
InnerKernel(mc, nc, kc, alpha, &A(i, p), lda, &B(p, j), ldb, beta_,
&C(i, j), ldc, i == 0);
&C(i, j), ldc, i == 0);
}
}
}
......
......@@ -58,14 +58,15 @@ void AddDot4x4(int k, float alpha, const float *A, int lda, const float *B,
int ldb, float beta, float *C, int ldc, int mc, int nc);
void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu);
int ldb, float beta, float *C, int ldc, int mc, int nc,
bool relu);
// 32位 float 矩阵乘法
void sgemm(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc);
void sgemm_relu(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc);
const float *B, int ldb, float beta, float *C, int ldc);
// 64位 double 矩阵乘法
void dgemm(int m, int n, int k, float alpha, const double *A, int lda,
......
......@@ -42,19 +42,19 @@ void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
int K = (trans_a == false) ? dim_a[1] : dim_a[0];
if (relu) {
sgemm_relu(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
beta, matrix_out->data<float>(), N);
sgemm_relu(M, N, K, alpha, matrix_a.data<float>(), K,
matrix_b.data<float>(), N, beta, matrix_out->data<float>(), N);
} else {
sgemm(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
beta, matrix_out->data<float>(), N);
}
}
template <>
void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
const framework::Tensor &matrix_b, bool trans_b,
double alpha, framework::Tensor *matrix_out, double beta, bool relu) {
double alpha, framework::Tensor *matrix_out, double beta,
bool relu) {
auto dim_a = matrix_a.dims();
auto dim_b = matrix_b.dims();
auto dim_out = matrix_out->dims();
......@@ -74,8 +74,6 @@ void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
int K = (trans_a == false) ? dim_a[1] : dim_a[0];
}
} // namespace math
} // namespace operators
} // namespace paddle_mobile
......@@ -839,12 +839,12 @@ Print &operator<<(Print &printer, const FushionConvAddParam &conv_param);
#endif
#ifdef FUSION_CONVADD_RELU_OP
class FushionConvAddReluParam: public FushionConvAddParam {
class FushionConvAddReluParam : public FushionConvAddParam {
public:
FushionConvAddReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope): FushionConvAddParam(inputs, outputs, attrs, scope) {
}
const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope)
: FushionConvAddParam(inputs, outputs, attrs, scope) {}
};
#endif
......
......@@ -42,7 +42,8 @@ using std::vector;
template <typename DeviceType, typename OpType>
class Executor4Test : public Executor<DeviceType> {
public:
Executor4Test(Program<DeviceType> p, string op_type, bool use_optimize = false)
Executor4Test(Program<DeviceType> p, string op_type,
bool use_optimize = false)
: Executor<DeviceType>() {
this->use_optimize_ = use_optimize;
this->program_ = p;
......@@ -62,16 +63,14 @@ class Executor4Test : public Executor<DeviceType> {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (std::shared_ptr<OpDesc> op : ops) {
if (op->Type() == op_type) {
DLOG << "匹配到: " << op->Type();
/// test first meeting op in program
std::shared_ptr<paddle_mobile::framework::OperatorBase<DeviceType>>
op_ptr = paddle_mobile::framework::OpRegistry<
DeviceType>::CreateOp(op->Type(), op->GetInputs(),
op->GetOutputs(),
op->GetAttrMap(),
this->program_.scope);
op_ptr =
paddle_mobile::framework::OpRegistry<DeviceType>::CreateOp(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), this->program_.scope);
this->ops_of_block_[*block_desc.get()].push_back(op_ptr);
break;
}
......
......@@ -20,9 +20,10 @@ int main() {
// ../../../test/models/googlenet
// ../../../test/models/mobilenet
auto program = loader.Load(g_mobilenet_ssd, false, false);
// auto program = loader.Load(g_googlenet_combine + "/model", g_googlenet_combine +
// "/params", true);
// auto program = loader.Load(g_googlenet_combine + "/model",
// g_googlenet_combine +
// "/params", true);
// program.originProgram->Description("program desc: ");
return 0;
}
......@@ -23,9 +23,10 @@ int main() {
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail");
Executor4Test<paddle_mobile::CPU,
paddle_mobile::operators::FusionConvAddReluOp<paddle_mobile::CPU, float>>
executor(program, "fusion_conv_add_relu", true);
Executor4Test<
paddle_mobile::CPU,
paddle_mobile::operators::FusionConvAddReluOp<paddle_mobile::CPU, float>>
executor(program, "fusion_conv_add_relu", true);
paddle_mobile::framework::Tensor input;
GetInput<float>(g_test_image_1x3x224x224, &input, {1, 3, 224, 224});
......
......@@ -23,8 +23,8 @@ int main() {
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail");
Executor4Test<paddle_mobile::GPU_MALI,
paddle_mobile::operators::ConvOp<paddle_mobile::GPU_MALI, float>>
Executor4Test<paddle_mobile::GPU_MALI, paddle_mobile::operators::ConvOp<
paddle_mobile::GPU_MALI, float>>
executor(program, "conv2d");
paddle_mobile::framework::Tensor input;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册