提交 4a493ead 编写于 作者: L liuruilong

format files

上级 d4c0395f
......@@ -17,8 +17,8 @@ limitations under the License. */
#include "operators/conv_op.h"
#include <vector>
#include "framework/op_proto_maker.h"
#include "operators/math/conv_func.h"
#include "framework/op_registry.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
......@@ -39,9 +39,9 @@ void ConvOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i],
strides[i]));
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
......
......@@ -17,9 +17,9 @@ limitations under the License. */
#include "operators/depthwise_conv_op.h"
#include <vector>
#include "framework/op_proto_maker.h"
#include "operators/math/conv_func.h"
#include "framework/op_registry.h"
#include "operators/conv_op.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
......@@ -40,9 +40,9 @@ void DepthwiseConvOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i],
strides[i]));
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
......
......@@ -14,8 +14,8 @@ limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/math/conv_func.h"
#include "operators/fusion_conv_add.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
......@@ -36,9 +36,9 @@ void FushionConvAddOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i],
strides[i]));
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
......
......@@ -18,10 +18,10 @@ limitations under the License. */
#include <string>
#include <vector>
#include "op_param.h"
#include "framework/operator.h"
#include "operators/kernel/conv_add_kernel.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h"
#include "operators/kernel/conv_add_kernel.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -36,16 +36,16 @@ void FusionConvAddReluOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i],
strides[i]));
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
}
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
......
......@@ -17,9 +17,9 @@ limitations under the License. */
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
#include "operators/kernel/conv_add_relu_kernel.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/conv_add_relu_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......@@ -42,7 +42,7 @@ class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher {
};
template <typename DeviceType, typename T>
class FusionConvAddReluOp: public framework::OperatorWithKernel<
class FusionConvAddReluOp : public framework::OperatorWithKernel<
DeviceType, FushionConvAddReluParam,
operators::ConvAddReluKernel<DeviceType, T>> {
public:
......@@ -50,19 +50,22 @@ class FusionConvAddReluOp: public framework::OperatorWithKernel<
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, FushionConvAddReluParam,
operators::ConvAddReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
: framework::OperatorWithKernel<
DeviceType, FushionConvAddReluParam,
operators::ConvAddReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FushionConvAddReluParam,
operators::ConvAddReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
#ifdef PADDLE_MOBILE_CPU
//static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new FushionConvAddReluOpMatcher());
// static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new
// FushionConvAddReluOpMatcher());
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
......
......@@ -70,7 +70,7 @@ class FushionFcOp : public framework::OperatorWithKernel<
static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
//static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
// static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
......@@ -50,7 +50,8 @@ void ConvAddKernel<CPU, float>::Compute(
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand = math::IsExpand(filter_shape_vec, strides, paddings, dilations);
bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
......
......@@ -51,7 +51,8 @@ void ConvAddReluKernel<CPU, float>::Compute(
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand = math::IsExpand(filter_shape_vec, strides, paddings, dilations);
bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
......@@ -105,13 +106,12 @@ void ConvAddReluKernel<CPU, float>::Compute(
math::matmul<float>(filter_slice, false, col_matrix, false,
static_cast<float>(1), &out_slice,
static_cast<float>(1), true);
}
}
}
template class ConvAddReluKernel<CPU, float>;
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -21,12 +21,12 @@ limitations under the License. */
#include <arm_neon.h>
#endif
#include "framework/ddim.h"
#include "operators/op_param.h"
#include "framework/operator.h"
#include "operators/math/im2col.h"
#include "operators/math/vol2col.h"
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......@@ -34,7 +34,6 @@ namespace operators {
using framework::DDim;
using framework::OpKernelBase;
template <typename DeviceType, typename T>
class ConvAddKernel : public OpKernelBase<DeviceType, FushionConvAddParam> {
public:
......
......@@ -19,11 +19,11 @@ limitations under the License. */
#include <vector>
#include "framework/ddim.h"
#include "framework/operator.h"
#include "operators/op_param.h"
#include "operators/math/im2col.h"
#include "operators/math/vol2col.h"
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......@@ -32,7 +32,8 @@ using framework::DDim;
using framework::OpKernelBase;
template <typename DeviceType, typename T>
class ConvAddReluKernel : public OpKernelBase<DeviceType, FushionConvAddReluParam> {
class ConvAddReluKernel
: public OpKernelBase<DeviceType, FushionConvAddReluParam> {
public:
void Compute(const FushionConvAddReluParam &param) const;
};
......
......@@ -22,8 +22,8 @@ namespace paddle_mobile {
namespace operators {
template <>
void BatchNormKernel<GPU_MALI, float>::Compute(const BatchNormParam &param) const {
}
void BatchNormKernel<GPU_MALI, float>::Compute(
const BatchNormParam &param) const {}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -98,6 +98,6 @@ inline bool IsExpand(const std::vector<int64_t> &filter_dim,
return !(filter_1 && strides_1 && padding_0 && dilation_1);
}
}
}
}
} // namespace math
} // namespace operators
} // namespace paddle_mobile
......@@ -215,7 +215,6 @@ void InnerKernel_relu(int m, int n, int k, float alpha, const float *A, int lda,
}
}
//计算一个更小的 4 * 4 的 C 矩阵分块
#if defined(IOS)
void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
......@@ -269,7 +268,8 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
}
void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu = false) {
int ldb, float beta, float *C, int ldc, int mc, int nc,
bool relu = false) {
// init C
float32x4_t cv0 = vdupq_n_f32(0.0);
float32x4_t cv1 = vdupq_n_f32(0.0);
......@@ -458,7 +458,8 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
}
void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu = false) {
int ldb, float beta, float *C, int ldc, int mc, int nc,
bool relu = false) {
int kc1 = k / 2, kc2 = k % 2;
int bytes_ldc = 4 * ldc;
int flag_alpha = (alpha == 1.0) ? 1 : 2;
......@@ -571,8 +572,8 @@ void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
[kc2] "r"(kc2), [mc] "r"(mc), [nc] "r"(nc), [alpha] "r"(alpha),
[beta] "r"(beta), [bytes_ldc] "r"(bytes_ldc),
[flag_alpha] "r"(flag_alpha), [flag_beta] "r"(flag_beta)
: "memory", "q0", "q1", "q2", "q3", "q4", "q10", "q11", "q12", "q13", "q14");
: "memory", "q0", "q1", "q2", "q3", "q4", "q10", "q11", "q12", "q13",
"q14");
if (mc != MR || nc != NR) {
int i, j;
......@@ -599,7 +600,6 @@ void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
C(i, j) = 0;
}
}
}
}
}
......@@ -664,7 +664,8 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
}
void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu) {
int ldb, float beta, float *C, int ldc, int mc, int nc,
bool relu) {
float c[16] = {0};
float reg_a0, reg_a1, reg_a2, reg_a3, reg_b0, reg_b1, reg_b2, reg_b3;
......@@ -725,7 +726,6 @@ void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
}
}
#endif
// 32位 float 矩阵乘法
......@@ -768,8 +768,8 @@ void sgemm_relu(int m, int n, int k, float alpha, const float *A, int lda,
}
if (p + KC >= k) {
InnerKernel_relu(mc, nc, kc, alpha, &A(i, p), lda, &B(p, j), ldb, beta_,
&C(i, j), ldc, i == 0, true);
InnerKernel_relu(mc, nc, kc, alpha, &A(i, p), lda, &B(p, j), ldb,
beta_, &C(i, j), ldc, i == 0, true);
} else {
InnerKernel(mc, nc, kc, alpha, &A(i, p), lda, &B(p, j), ldb, beta_,
&C(i, j), ldc, i == 0);
......
......@@ -58,7 +58,8 @@ void AddDot4x4(int k, float alpha, const float *A, int lda, const float *B,
int ldb, float beta, float *C, int ldc, int mc, int nc);
void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu);
int ldb, float beta, float *C, int ldc, int mc, int nc,
bool relu);
// 32位 float 矩阵乘法
void sgemm(int m, int n, int k, float alpha, const float *A, int lda,
......
......@@ -42,19 +42,19 @@ void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
int K = (trans_a == false) ? dim_a[1] : dim_a[0];
if (relu) {
sgemm_relu(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
beta, matrix_out->data<float>(), N);
sgemm_relu(M, N, K, alpha, matrix_a.data<float>(), K,
matrix_b.data<float>(), N, beta, matrix_out->data<float>(), N);
} else {
sgemm(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
beta, matrix_out->data<float>(), N);
}
}
template <>
void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
const framework::Tensor &matrix_b, bool trans_b,
double alpha, framework::Tensor *matrix_out, double beta, bool relu) {
double alpha, framework::Tensor *matrix_out, double beta,
bool relu) {
auto dim_a = matrix_a.dims();
auto dim_b = matrix_b.dims();
auto dim_out = matrix_out->dims();
......@@ -74,8 +74,6 @@ void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
int K = (trans_a == false) ? dim_a[1] : dim_a[0];
}
} // namespace math
} // namespace operators
} // namespace paddle_mobile
......@@ -839,12 +839,12 @@ Print &operator<<(Print &printer, const FushionConvAddParam &conv_param);
#endif
#ifdef FUSION_CONVADD_RELU_OP
class FushionConvAddReluParam: public FushionConvAddParam {
class FushionConvAddReluParam : public FushionConvAddParam {
public:
FushionConvAddReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope): FushionConvAddParam(inputs, outputs, attrs, scope) {
}
const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope)
: FushionConvAddParam(inputs, outputs, attrs, scope) {}
};
#endif
......
......@@ -42,7 +42,8 @@ using std::vector;
template <typename DeviceType, typename OpType>
class Executor4Test : public Executor<DeviceType> {
public:
Executor4Test(Program<DeviceType> p, string op_type, bool use_optimize = false)
Executor4Test(Program<DeviceType> p, string op_type,
bool use_optimize = false)
: Executor<DeviceType>() {
this->use_optimize_ = use_optimize;
this->program_ = p;
......@@ -62,16 +63,14 @@ class Executor4Test : public Executor<DeviceType> {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (std::shared_ptr<OpDesc> op : ops) {
if (op->Type() == op_type) {
DLOG << "匹配到: " << op->Type();
/// test first meeting op in program
std::shared_ptr<paddle_mobile::framework::OperatorBase<DeviceType>>
op_ptr = paddle_mobile::framework::OpRegistry<
DeviceType>::CreateOp(op->Type(), op->GetInputs(),
op->GetOutputs(),
op->GetAttrMap(),
this->program_.scope);
op_ptr =
paddle_mobile::framework::OpRegistry<DeviceType>::CreateOp(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), this->program_.scope);
this->ops_of_block_[*block_desc.get()].push_back(op_ptr);
break;
}
......
......@@ -20,8 +20,9 @@ int main() {
// ../../../test/models/googlenet
// ../../../test/models/mobilenet
auto program = loader.Load(g_mobilenet_ssd, false, false);
// auto program = loader.Load(g_googlenet_combine + "/model", g_googlenet_combine +
// "/params", true);
// auto program = loader.Load(g_googlenet_combine + "/model",
// g_googlenet_combine +
// "/params", true);
// program.originProgram->Description("program desc: ");
return 0;
......
......@@ -23,7 +23,8 @@ int main() {
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail");
Executor4Test<paddle_mobile::CPU,
Executor4Test<
paddle_mobile::CPU,
paddle_mobile::operators::FusionConvAddReluOp<paddle_mobile::CPU, float>>
executor(program, "fusion_conv_add_relu", true);
......
......@@ -23,8 +23,8 @@ int main() {
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail");
Executor4Test<paddle_mobile::GPU_MALI,
paddle_mobile::operators::ConvOp<paddle_mobile::GPU_MALI, float>>
Executor4Test<paddle_mobile::GPU_MALI, paddle_mobile::operators::ConvOp<
paddle_mobile::GPU_MALI, float>>
executor(program, "conv2d");
paddle_mobile::framework::Tensor input;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册