提交 4a493ead 编写于 作者: L liuruilong

format files

上级 d4c0395f
...@@ -17,8 +17,8 @@ limitations under the License. */ ...@@ -17,8 +17,8 @@ limitations under the License. */
#include "operators/conv_op.h" #include "operators/conv_op.h"
#include <vector> #include <vector>
#include "framework/op_proto_maker.h" #include "framework/op_proto_maker.h"
#include "operators/math/conv_func.h"
#include "framework/op_registry.h" #include "framework/op_registry.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -39,9 +39,9 @@ void ConvOp<Dtype, T>::InferShape() const { ...@@ -39,9 +39,9 @@ void ConvOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], output_shape.push_back(
dilations[i], paddings[i], math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
strides[i])); paddings[i], strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -17,9 +17,9 @@ limitations under the License. */ ...@@ -17,9 +17,9 @@ limitations under the License. */
#include "operators/depthwise_conv_op.h" #include "operators/depthwise_conv_op.h"
#include <vector> #include <vector>
#include "framework/op_proto_maker.h" #include "framework/op_proto_maker.h"
#include "operators/math/conv_func.h"
#include "framework/op_registry.h" #include "framework/op_registry.h"
#include "operators/conv_op.h" #include "operators/conv_op.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -40,9 +40,9 @@ void DepthwiseConvOp<Dtype, T>::InferShape() const { ...@@ -40,9 +40,9 @@ void DepthwiseConvOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], output_shape.push_back(
dilations[i], paddings[i], math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
strides[i])); paddings[i], strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -14,8 +14,8 @@ limitations under the License. */ ...@@ -14,8 +14,8 @@ limitations under the License. */
#ifdef FUSION_CONVADD_OP #ifdef FUSION_CONVADD_OP
#include "operators/math/conv_func.h"
#include "operators/fusion_conv_add.h" #include "operators/fusion_conv_add.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FushionConvAddOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FushionConvAddOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], output_shape.push_back(
dilations[i], paddings[i], math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
strides[i])); paddings[i], strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -18,10 +18,10 @@ limitations under the License. */ ...@@ -18,10 +18,10 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "op_param.h"
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/kernel/conv_add_kernel.h"
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h"
#include "operators/kernel/conv_add_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
......
...@@ -36,16 +36,16 @@ void FusionConvAddReluOp<Dtype, T>::InferShape() const { ...@@ -36,16 +36,16 @@ void FusionConvAddReluOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], output_shape.push_back(
dilations[i], paddings[i], math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
strides[i])); paddings[i], strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim); this->param_.Output()->Resize(ddim);
} }
} } // namespace operators
} } // namespace paddle_mobile
namespace ops = paddle_mobile::operators; namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
......
...@@ -17,9 +17,9 @@ limitations under the License. */ ...@@ -17,9 +17,9 @@ limitations under the License. */
#pragma once #pragma once
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/op_param.h"
#include "operators/kernel/conv_add_relu_kernel.h"
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/conv_add_relu_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -42,27 +42,30 @@ class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher { ...@@ -42,27 +42,30 @@ class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher {
}; };
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class FusionConvAddReluOp: public framework::OperatorWithKernel< class FusionConvAddReluOp : public framework::OperatorWithKernel<
DeviceType, FushionConvAddReluParam, DeviceType, FushionConvAddReluParam,
operators::ConvAddReluKernel<DeviceType, T>> { operators::ConvAddReluKernel<DeviceType, T>> {
public: public:
FusionConvAddReluOp(const string &type, const VariableNameMap &inputs, FusionConvAddReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, FushionConvAddReluParam, : framework::OperatorWithKernel<
operators::ConvAddReluKernel<DeviceType, T>>( DeviceType, FushionConvAddReluParam,
type, inputs, outputs, attrs, scope) {} operators::ConvAddReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {}
using framework::OperatorWithKernel< using framework::OperatorWithKernel<
DeviceType, FushionConvAddReluParam, DeviceType, FushionConvAddReluParam,
operators::ConvAddReluKernel<DeviceType, T>>::OperatorWithKernel; operators::ConvAddReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
}; };
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
//static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new FushionConvAddReluOpMatcher()); // static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new
// FushionConvAddReluOpMatcher());
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
#endif #endif
......
...@@ -70,7 +70,7 @@ class FushionFcOp : public framework::OperatorWithKernel< ...@@ -70,7 +70,7 @@ class FushionFcOp : public framework::OperatorWithKernel<
static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
//static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); // static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
......
...@@ -50,7 +50,8 @@ void ConvAddKernel<CPU, float>::Compute( ...@@ -50,7 +50,8 @@ void ConvAddKernel<CPU, float>::Compute(
framework::DDim col_matrix_shape = framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1); framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand = math::IsExpand(filter_shape_vec, strides, paddings, dilations); bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col; Tensor col;
Tensor col_matrix; Tensor col_matrix;
if (is_expand) { if (is_expand) {
......
...@@ -21,7 +21,7 @@ namespace operators { ...@@ -21,7 +21,7 @@ namespace operators {
template <> template <>
void ConvAddReluKernel<CPU, float>::Compute( void ConvAddReluKernel<CPU, float>::Compute(
const FushionConvAddReluParam &param) const { const FushionConvAddReluParam &param) const {
const Tensor *input = param.Input(); const Tensor *input = param.Input();
Tensor filter = *param.Filter(); Tensor filter = *param.Filter();
Tensor bias = *param.Bias(); Tensor bias = *param.Bias();
...@@ -49,9 +49,10 @@ void ConvAddReluKernel<CPU, float>::Compute( ...@@ -49,9 +49,10 @@ void ConvAddReluKernel<CPU, float>::Compute(
framework::DDim col_shape(framework::make_ddim(col_shape_vec)); framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape = framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1); framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand = math::IsExpand(filter_shape_vec, strides, paddings, dilations); bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col; Tensor col;
Tensor col_matrix; Tensor col_matrix;
if (is_expand) { if (is_expand) {
...@@ -61,14 +62,14 @@ void ConvAddReluKernel<CPU, float>::Compute( ...@@ -61,14 +62,14 @@ void ConvAddReluKernel<CPU, float>::Compute(
} }
framework::DDim input_shape = framework::slice_ddim( framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size())); input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0], framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]}; filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape); filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = { framework::DDim output_matrix_shape = {
output->dims()[1], output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])}; output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm // convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups; int in_step = static_cast<int>(input->dims()[1]) / groups;
...@@ -105,13 +106,12 @@ void ConvAddReluKernel<CPU, float>::Compute( ...@@ -105,13 +106,12 @@ void ConvAddReluKernel<CPU, float>::Compute(
math::matmul<float>(filter_slice, false, col_matrix, false, math::matmul<float>(filter_slice, false, col_matrix, false,
static_cast<float>(1), &out_slice, static_cast<float>(1), &out_slice,
static_cast<float>(1), true); static_cast<float>(1), true);
} }
} }
} }
template class ConvAddReluKernel<CPU, float>; template class ConvAddReluKernel<CPU, float>;
} } // namespace operators
} } // namespace paddle_mobile
#endif #endif
...@@ -21,12 +21,12 @@ limitations under the License. */ ...@@ -21,12 +21,12 @@ limitations under the License. */
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#include "framework/ddim.h" #include "framework/ddim.h"
#include "operators/op_param.h"
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/math/im2col.h"
#include "operators/math/vol2col.h"
#include "operators/math/conv_func.h" #include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h" #include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -34,7 +34,6 @@ namespace operators { ...@@ -34,7 +34,6 @@ namespace operators {
using framework::DDim; using framework::DDim;
using framework::OpKernelBase; using framework::OpKernelBase;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class ConvAddKernel : public OpKernelBase<DeviceType, FushionConvAddParam> { class ConvAddKernel : public OpKernelBase<DeviceType, FushionConvAddParam> {
public: public:
......
...@@ -19,11 +19,11 @@ limitations under the License. */ ...@@ -19,11 +19,11 @@ limitations under the License. */
#include <vector> #include <vector>
#include "framework/ddim.h" #include "framework/ddim.h"
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/op_param.h"
#include "operators/math/im2col.h"
#include "operators/math/vol2col.h"
#include "operators/math/conv_func.h" #include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h" #include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -32,7 +32,8 @@ using framework::DDim; ...@@ -32,7 +32,8 @@ using framework::DDim;
using framework::OpKernelBase; using framework::OpKernelBase;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class ConvAddReluKernel : public OpKernelBase<DeviceType, FushionConvAddReluParam> { class ConvAddReluKernel
: public OpKernelBase<DeviceType, FushionConvAddReluParam> {
public: public:
void Compute(const FushionConvAddReluParam &param) const; void Compute(const FushionConvAddReluParam &param) const;
}; };
......
...@@ -22,8 +22,8 @@ namespace paddle_mobile { ...@@ -22,8 +22,8 @@ namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
void BatchNormKernel<GPU_MALI, float>::Compute(const BatchNormParam &param) const { void BatchNormKernel<GPU_MALI, float>::Compute(
} const BatchNormParam &param) const {}
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -42,7 +42,7 @@ inline void expand_bias(Tensor &bias, int axis, const DDim &dDim) { ...@@ -42,7 +42,7 @@ inline void expand_bias(Tensor &bias, int axis, const DDim &dDim) {
"the bias tensor's dims size != 1") "the bias tensor's dims size != 1")
DDim outer_ddim = paddle_mobile::framework::slice_ddim(dDim, 0, axis + 1); DDim outer_ddim = paddle_mobile::framework::slice_ddim(dDim, 0, axis + 1);
DDim inner_ddim = DDim inner_ddim =
paddle_mobile::framework::slice_ddim(dDim, axis + 1, dDim.size()); paddle_mobile::framework::slice_ddim(dDim, axis + 1, dDim.size());
int outer_size = paddle_mobile::framework::product(outer_ddim); int outer_size = paddle_mobile::framework::product(outer_ddim);
int inner_size = paddle_mobile::framework::product(inner_ddim); int inner_size = paddle_mobile::framework::product(inner_ddim);
bias.Resize(dDim); bias.Resize(dDim);
...@@ -98,6 +98,6 @@ inline bool IsExpand(const std::vector<int64_t> &filter_dim, ...@@ -98,6 +98,6 @@ inline bool IsExpand(const std::vector<int64_t> &filter_dim,
return !(filter_1 && strides_1 && padding_0 && dilation_1); return !(filter_1 && strides_1 && padding_0 && dilation_1);
} }
} } // namespace math
} } // namespace operators
} } // namespace paddle_mobile
...@@ -177,8 +177,8 @@ void InnerKernel(int m, int n, int k, float alpha, const float *A, int lda, ...@@ -177,8 +177,8 @@ void InnerKernel(int m, int n, int k, float alpha, const float *A, int lda,
// 分块矩阵乘法 // 分块矩阵乘法
void InnerKernel_relu(int m, int n, int k, float alpha, const float *A, int lda, void InnerKernel_relu(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc, const float *B, int ldb, float beta, float *C, int ldc,
int first_time, bool relu = false) { int first_time, bool relu = false) {
int Buff_A_M = m; int Buff_A_M = m;
int Buff_B_N = n; int Buff_B_N = n;
...@@ -210,12 +210,11 @@ void InnerKernel_relu(int m, int n, int k, float alpha, const float *A, int lda, ...@@ -210,12 +210,11 @@ void InnerKernel_relu(int m, int n, int k, float alpha, const float *A, int lda,
for (i = 0; i < Buff_A_M; i += MR) { for (i = 0; i < Buff_A_M; i += MR) {
mc = (m - i) < MR ? _mc : MR; mc = (m - i) < MR ? _mc : MR;
AddDot4x4_relu(k, alpha, &packedA[i * k], 4, &packedB[j * k], k, beta, AddDot4x4_relu(k, alpha, &packedA[i * k], 4, &packedB[j * k], k, beta,
&C(i, j), ldc, mc, nc, relu); &C(i, j), ldc, mc, nc, relu);
} }
} }
} }
//计算一个更小的 4 * 4 的 C 矩阵分块 //计算一个更小的 4 * 4 的 C 矩阵分块
#if defined(IOS) #if defined(IOS)
void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
...@@ -269,8 +268,9 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, ...@@ -269,8 +268,9 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
} }
void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b, void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu = false) { int ldb, float beta, float *C, int ldc, int mc, int nc,
// init C bool relu = false) {
// init C
float32x4_t cv0 = vdupq_n_f32(0.0); float32x4_t cv0 = vdupq_n_f32(0.0);
float32x4_t cv1 = vdupq_n_f32(0.0); float32x4_t cv1 = vdupq_n_f32(0.0);
float32x4_t cv2 = vdupq_n_f32(0.0); float32x4_t cv2 = vdupq_n_f32(0.0);
...@@ -458,7 +458,8 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, ...@@ -458,7 +458,8 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
} }
void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b, void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu = false) { int ldb, float beta, float *C, int ldc, int mc, int nc,
bool relu = false) {
int kc1 = k / 2, kc2 = k % 2; int kc1 = k / 2, kc2 = k % 2;
int bytes_ldc = 4 * ldc; int bytes_ldc = 4 * ldc;
int flag_alpha = (alpha == 1.0) ? 1 : 2; int flag_alpha = (alpha == 1.0) ? 1 : 2;
...@@ -571,8 +572,8 @@ void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b, ...@@ -571,8 +572,8 @@ void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
[kc2] "r"(kc2), [mc] "r"(mc), [nc] "r"(nc), [alpha] "r"(alpha), [kc2] "r"(kc2), [mc] "r"(mc), [nc] "r"(nc), [alpha] "r"(alpha),
[beta] "r"(beta), [bytes_ldc] "r"(bytes_ldc), [beta] "r"(beta), [bytes_ldc] "r"(bytes_ldc),
[flag_alpha] "r"(flag_alpha), [flag_beta] "r"(flag_beta) [flag_alpha] "r"(flag_alpha), [flag_beta] "r"(flag_beta)
: "memory", "q0", "q1", "q2", "q3", "q4", "q10", "q11", "q12", "q13", "q14"); : "memory", "q0", "q1", "q2", "q3", "q4", "q10", "q11", "q12", "q13",
"q14");
if (mc != MR || nc != NR) { if (mc != MR || nc != NR) {
int i, j; int i, j;
...@@ -599,7 +600,6 @@ void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b, ...@@ -599,7 +600,6 @@ void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
C(i, j) = 0; C(i, j) = 0;
} }
} }
} }
} }
} }
...@@ -664,7 +664,8 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, ...@@ -664,7 +664,8 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
} }
void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b, void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu) { int ldb, float beta, float *C, int ldc, int mc, int nc,
bool relu) {
float c[16] = {0}; float c[16] = {0};
float reg_a0, reg_a1, reg_a2, reg_a3, reg_b0, reg_b1, reg_b2, reg_b3; float reg_a0, reg_a1, reg_a2, reg_a3, reg_b0, reg_b1, reg_b2, reg_b3;
...@@ -725,7 +726,6 @@ void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b, ...@@ -725,7 +726,6 @@ void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
} }
} }
#endif #endif
// 32位 float 矩阵乘法 // 32位 float 矩阵乘法
...@@ -768,11 +768,11 @@ void sgemm_relu(int m, int n, int k, float alpha, const float *A, int lda, ...@@ -768,11 +768,11 @@ void sgemm_relu(int m, int n, int k, float alpha, const float *A, int lda,
} }
if (p + KC >= k) { if (p + KC >= k) {
InnerKernel_relu(mc, nc, kc, alpha, &A(i, p), lda, &B(p, j), ldb, beta_, InnerKernel_relu(mc, nc, kc, alpha, &A(i, p), lda, &B(p, j), ldb,
&C(i, j), ldc, i == 0, true); beta_, &C(i, j), ldc, i == 0, true);
} else { } else {
InnerKernel(mc, nc, kc, alpha, &A(i, p), lda, &B(p, j), ldb, beta_, InnerKernel(mc, nc, kc, alpha, &A(i, p), lda, &B(p, j), ldb, beta_,
&C(i, j), ldc, i == 0); &C(i, j), ldc, i == 0);
} }
} }
} }
......
...@@ -58,14 +58,15 @@ void AddDot4x4(int k, float alpha, const float *A, int lda, const float *B, ...@@ -58,14 +58,15 @@ void AddDot4x4(int k, float alpha, const float *A, int lda, const float *B,
int ldb, float beta, float *C, int ldc, int mc, int nc); int ldb, float beta, float *C, int ldc, int mc, int nc);
void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b, void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu); int ldb, float beta, float *C, int ldc, int mc, int nc,
bool relu);
// 32位 float 矩阵乘法 // 32位 float 矩阵乘法
void sgemm(int m, int n, int k, float alpha, const float *A, int lda, void sgemm(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc); const float *B, int ldb, float beta, float *C, int ldc);
void sgemm_relu(int m, int n, int k, float alpha, const float *A, int lda, void sgemm_relu(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc); const float *B, int ldb, float beta, float *C, int ldc);
// 64位 double 矩阵乘法 // 64位 double 矩阵乘法
void dgemm(int m, int n, int k, float alpha, const double *A, int lda, void dgemm(int m, int n, int k, float alpha, const double *A, int lda,
......
...@@ -42,19 +42,19 @@ void matmul<float>(const framework::Tensor &matrix_a, bool trans_a, ...@@ -42,19 +42,19 @@ void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
int K = (trans_a == false) ? dim_a[1] : dim_a[0]; int K = (trans_a == false) ? dim_a[1] : dim_a[0];
if (relu) { if (relu) {
sgemm_relu(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N, sgemm_relu(M, N, K, alpha, matrix_a.data<float>(), K,
beta, matrix_out->data<float>(), N); matrix_b.data<float>(), N, beta, matrix_out->data<float>(), N);
} else { } else {
sgemm(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N, sgemm(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
beta, matrix_out->data<float>(), N); beta, matrix_out->data<float>(), N);
} }
} }
template <> template <>
void matmul<double>(const framework::Tensor &matrix_a, bool trans_a, void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
const framework::Tensor &matrix_b, bool trans_b, const framework::Tensor &matrix_b, bool trans_b,
double alpha, framework::Tensor *matrix_out, double beta, bool relu) { double alpha, framework::Tensor *matrix_out, double beta,
bool relu) {
auto dim_a = matrix_a.dims(); auto dim_a = matrix_a.dims();
auto dim_b = matrix_b.dims(); auto dim_b = matrix_b.dims();
auto dim_out = matrix_out->dims(); auto dim_out = matrix_out->dims();
...@@ -74,8 +74,6 @@ void matmul<double>(const framework::Tensor &matrix_a, bool trans_a, ...@@ -74,8 +74,6 @@ void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
int K = (trans_a == false) ? dim_a[1] : dim_a[0]; int K = (trans_a == false) ? dim_a[1] : dim_a[0];
} }
} // namespace math } // namespace math
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -839,12 +839,12 @@ Print &operator<<(Print &printer, const FushionConvAddParam &conv_param); ...@@ -839,12 +839,12 @@ Print &operator<<(Print &printer, const FushionConvAddParam &conv_param);
#endif #endif
#ifdef FUSION_CONVADD_RELU_OP #ifdef FUSION_CONVADD_RELU_OP
class FushionConvAddReluParam: public FushionConvAddParam { class FushionConvAddReluParam : public FushionConvAddParam {
public: public:
FushionConvAddReluParam(const VariableNameMap &inputs, FushionConvAddReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs,
const Scope &scope): FushionConvAddParam(inputs, outputs, attrs, scope) { const AttributeMap &attrs, const Scope &scope)
} : FushionConvAddParam(inputs, outputs, attrs, scope) {}
}; };
#endif #endif
......
...@@ -42,7 +42,8 @@ using std::vector; ...@@ -42,7 +42,8 @@ using std::vector;
template <typename DeviceType, typename OpType> template <typename DeviceType, typename OpType>
class Executor4Test : public Executor<DeviceType> { class Executor4Test : public Executor<DeviceType> {
public: public:
Executor4Test(Program<DeviceType> p, string op_type, bool use_optimize = false) Executor4Test(Program<DeviceType> p, string op_type,
bool use_optimize = false)
: Executor<DeviceType>() { : Executor<DeviceType>() {
this->use_optimize_ = use_optimize; this->use_optimize_ = use_optimize;
this->program_ = p; this->program_ = p;
...@@ -62,16 +63,14 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -62,16 +63,14 @@ class Executor4Test : public Executor<DeviceType> {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops(); std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (std::shared_ptr<OpDesc> op : ops) { for (std::shared_ptr<OpDesc> op : ops) {
if (op->Type() == op_type) { if (op->Type() == op_type) {
DLOG << "匹配到: " << op->Type(); DLOG << "匹配到: " << op->Type();
/// test first meeting op in program /// test first meeting op in program
std::shared_ptr<paddle_mobile::framework::OperatorBase<DeviceType>> std::shared_ptr<paddle_mobile::framework::OperatorBase<DeviceType>>
op_ptr = paddle_mobile::framework::OpRegistry< op_ptr =
DeviceType>::CreateOp(op->Type(), op->GetInputs(), paddle_mobile::framework::OpRegistry<DeviceType>::CreateOp(
op->GetOutputs(), op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), op->GetAttrMap(), this->program_.scope);
this->program_.scope);
this->ops_of_block_[*block_desc.get()].push_back(op_ptr); this->ops_of_block_[*block_desc.get()].push_back(op_ptr);
break; break;
} }
......
...@@ -20,9 +20,10 @@ int main() { ...@@ -20,9 +20,10 @@ int main() {
// ../../../test/models/googlenet // ../../../test/models/googlenet
// ../../../test/models/mobilenet // ../../../test/models/mobilenet
auto program = loader.Load(g_mobilenet_ssd, false, false); auto program = loader.Load(g_mobilenet_ssd, false, false);
// auto program = loader.Load(g_googlenet_combine + "/model", g_googlenet_combine + // auto program = loader.Load(g_googlenet_combine + "/model",
// "/params", true); // g_googlenet_combine +
// "/params", true);
// program.originProgram->Description("program desc: "); // program.originProgram->Description("program desc: ");
return 0; return 0;
} }
...@@ -23,9 +23,10 @@ int main() { ...@@ -23,9 +23,10 @@ int main() {
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail"); "program file read fail");
Executor4Test<paddle_mobile::CPU, Executor4Test<
paddle_mobile::operators::FusionConvAddReluOp<paddle_mobile::CPU, float>> paddle_mobile::CPU,
executor(program, "fusion_conv_add_relu", true); paddle_mobile::operators::FusionConvAddReluOp<paddle_mobile::CPU, float>>
executor(program, "fusion_conv_add_relu", true);
paddle_mobile::framework::Tensor input; paddle_mobile::framework::Tensor input;
GetInput<float>(g_test_image_1x3x224x224, &input, {1, 3, 224, 224}); GetInput<float>(g_test_image_1x3x224x224, &input, {1, 3, 224, 224});
......
...@@ -23,8 +23,8 @@ int main() { ...@@ -23,8 +23,8 @@ int main() {
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail"); "program file read fail");
Executor4Test<paddle_mobile::GPU_MALI, Executor4Test<paddle_mobile::GPU_MALI, paddle_mobile::operators::ConvOp<
paddle_mobile::operators::ConvOp<paddle_mobile::GPU_MALI, float>> paddle_mobile::GPU_MALI, float>>
executor(program, "conv2d"); executor(program, "conv2d");
paddle_mobile::framework::Tensor input; paddle_mobile::framework::Tensor input;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册