提交 24b0736b 编写于 作者: L liuruilong

format files

上级 06be382a
...@@ -70,7 +70,7 @@ class FusionConvAddOp : public framework::OperatorWithKernel< ...@@ -70,7 +70,7 @@ class FusionConvAddOp : public framework::OperatorWithKernel<
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
#ifndef CONV_ADD_REGISTER #ifndef CONV_ADD_REGISTER
static framework::FusionOpRegistrar convadd_registrar( static framework::FusionOpRegistrar convadd_registrar(
new FusionConvAddMatcher()); new FusionConvAddMatcher());
#define CONV_ADD_REGISTER #define CONV_ADD_REGISTER
#endif #endif
#endif #endif
...@@ -79,7 +79,7 @@ static framework::FusionOpRegistrar convadd_registrar( ...@@ -79,7 +79,7 @@ static framework::FusionOpRegistrar convadd_registrar(
#ifndef CONV_ADD_REGISTER #ifndef CONV_ADD_REGISTER
static framework::FusionOpRegistrar convadd_registrar( static framework::FusionOpRegistrar convadd_registrar(
new FusionConvAddMatcher()); new FusionConvAddMatcher());
#define CONV_ADD_REGISTER #define CONV_ADD_REGISTER
#endif #endif
......
...@@ -21,7 +21,7 @@ limitations under the License. */ ...@@ -21,7 +21,7 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template<typename P> template <typename P>
void BatchnormCompute(const BatchNormParam &param) { void BatchnormCompute(const BatchNormParam &param) {
const Tensor *input_x = param.InputX(); const Tensor *input_x = param.InputX();
auto input_x_ptr = input_x->data<float>(); auto input_x_ptr = input_x->data<float>();
...@@ -63,7 +63,7 @@ void BatchnormCompute(const BatchNormParam &param) { ...@@ -63,7 +63,7 @@ void BatchnormCompute(const BatchNormParam &param) {
for (int i = 0; i < C * 4; i += 4) { for (int i = 0; i < C * 4; i += 4) {
int index = i / 4; int index = i / 4;
inv_std_ptr[i] = inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[index] + epsilon), 0.5)); 1 / static_cast<float>(pow((variance_ptr[index] + epsilon), 0.5));
inv_std_ptr[i + 1] = inv_std_ptr[i]; inv_std_ptr[i + 1] = inv_std_ptr[i];
inv_std_ptr[i + 2] = inv_std_ptr[i]; inv_std_ptr[i + 2] = inv_std_ptr[i];
inv_std_ptr[i + 3] = inv_std_ptr[i]; inv_std_ptr[i + 3] = inv_std_ptr[i];
...@@ -74,7 +74,7 @@ void BatchnormCompute(const BatchNormParam &param) { ...@@ -74,7 +74,7 @@ void BatchnormCompute(const BatchNormParam &param) {
new_scale_ptr[i + 3] = new_scale_ptr[i]; new_scale_ptr[i + 3] = new_scale_ptr[i];
new_bias_ptr[i] = new_bias_ptr[i] =
bias_ptr[index] - mean_ptr[index] * inv_std_ptr[i] * scale_ptr[index]; bias_ptr[index] - mean_ptr[index] * inv_std_ptr[i] * scale_ptr[index];
new_bias_ptr[i + 1] = new_bias_ptr[i]; new_bias_ptr[i + 1] = new_bias_ptr[i];
new_bias_ptr[i + 2] = new_bias_ptr[i]; new_bias_ptr[i + 2] = new_bias_ptr[i];
...@@ -87,105 +87,105 @@ void BatchnormCompute(const BatchNormParam &param) { ...@@ -87,105 +87,105 @@ void BatchnormCompute(const BatchNormParam &param) {
} }
asm volatile( asm volatile(
"subs %[N], %[N], #1 \n\t" "subs %[N], %[N], #1 \n\t"
"blt end_n_%= \n\t" "blt end_n_%= \n\t"
"loop_n_%=: \n\t" "loop_n_%=: \n\t"
"subs %[C], %[C], #1 \n\t" "subs %[C], %[C], #1 \n\t"
"blt end_c_%= \n\t" "blt end_c_%= \n\t"
"loop_c_%=: \n\t" "loop_c_%=: \n\t"
"vld1.32 {q9}, [%[new_scale_ptr]]! \n\t" "vld1.32 {q9}, [%[new_scale_ptr]]! \n\t"
"vld1.32 {q10}, [%[new_bias_ptr]]! \n\t" "vld1.32 {q10}, [%[new_bias_ptr]]! \n\t"
"mov r6, %[HXW] \n\t" "mov r6, %[HXW] \n\t"
"subs r6, r6, #32 \n\t" "subs r6, r6, #32 \n\t"
"blt end_hw_%= \n\t" "blt end_hw_%= \n\t"
"loop_hw_%=: \n\t" "loop_hw_%=: \n\t"
"vld1.32 {q1, q2}, [%[input_x_ptr]]! \n\t" "vld1.32 {q1, q2}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q3, q4}, [%[input_x_ptr]]! \n\t" "vld1.32 {q3, q4}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q5, q6}, [%[input_x_ptr]]! \n\t" "vld1.32 {q5, q6}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q7, q8}, [%[input_x_ptr]]! \n\t" "vld1.32 {q7, q8}, [%[input_x_ptr]]! \n\t"
"vmul.f32 q1, q1, q9 \n\t" "vmul.f32 q1, q1, q9 \n\t"
"vmul.f32 q2, q2, q9 \n\t" "vmul.f32 q2, q2, q9 \n\t"
"vmul.f32 q3, q3, q9 \n\t" "vmul.f32 q3, q3, q9 \n\t"
"vmul.f32 q4, q4, q9 \n\t" "vmul.f32 q4, q4, q9 \n\t"
"vmul.f32 q5, q5, q9 \n\t" "vmul.f32 q5, q5, q9 \n\t"
"vmul.f32 q6, q6, q9 \n\t" "vmul.f32 q6, q6, q9 \n\t"
"vmul.f32 q7, q7, q9 \n\t" "vmul.f32 q7, q7, q9 \n\t"
"vmul.f32 q8, q8, q9 \n\t" "vmul.f32 q8, q8, q9 \n\t"
"vadd.f32 q1, q1, q10 \n\t" "vadd.f32 q1, q1, q10 \n\t"
"vadd.f32 q2, q2, q10 \n\t" "vadd.f32 q2, q2, q10 \n\t"
"vadd.f32 q3, q3, q10 \n\t" "vadd.f32 q3, q3, q10 \n\t"
"vadd.f32 q4, q4, q10 \n\t" "vadd.f32 q4, q4, q10 \n\t"
"vadd.f32 q5, q5, q10 \n\t" "vadd.f32 q5, q5, q10 \n\t"
"vadd.f32 q6, q6, q10 \n\t" "vadd.f32 q6, q6, q10 \n\t"
"vadd.f32 q7, q7, q10 \n\t" "vadd.f32 q7, q7, q10 \n\t"
"vadd.f32 q8, q8, q10 \n\t" "vadd.f32 q8, q8, q10 \n\t"
"vst1.32 {q1, q2}, [%[out_ptr]]! \n\t" "vst1.32 {q1, q2}, [%[out_ptr]]! \n\t"
"vst1.32 {q3, q4}, [%[out_ptr]]! \n\t" "vst1.32 {q3, q4}, [%[out_ptr]]! \n\t"
"vst1.32 {q5, q6}, [%[out_ptr]]! \n\t" "vst1.32 {q5, q6}, [%[out_ptr]]! \n\t"
"vst1.32 {q7, q8}, [%[out_ptr]]! \n\t" "vst1.32 {q7, q8}, [%[out_ptr]]! \n\t"
"subs r6, r6, #32 \n\t" "subs r6, r6, #32 \n\t"
"bge loop_hw_%= \n\t" "bge loop_hw_%= \n\t"
"end_hw_%=: \n\t" "end_hw_%=: \n\t"
"cmp r6, #0 \n\t" "cmp r6, #0 \n\t"
"bge end_remainder_%= \n\t" "bge end_remainder_%= \n\t"
"mov r5, #4 \n\t" "mov r5, #4 \n\t"
"mul r6, r6, r5 \n\t" "mul r6, r6, r5 \n\t"
"add %[input_x_ptr], %[input_x_ptr], r6 \n\t" "add %[input_x_ptr], %[input_x_ptr], r6 \n\t"
"vld1.32 {q1, q2}, [%[input_x_ptr]]! \n\t" "vld1.32 {q1, q2}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q3, q4}, [%[input_x_ptr]]! \n\t" "vld1.32 {q3, q4}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q5, q6}, [%[input_x_ptr]]! \n\t" "vld1.32 {q5, q6}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q7, q8}, [%[input_x_ptr]]! \n\t" "vld1.32 {q7, q8}, [%[input_x_ptr]]! \n\t"
"vmul.f32 q1, q1, q9 \n\t" "vmul.f32 q1, q1, q9 \n\t"
"vmul.f32 q2, q2, q9 \n\t" "vmul.f32 q2, q2, q9 \n\t"
"vmul.f32 q3, q3, q9 \n\t" "vmul.f32 q3, q3, q9 \n\t"
"vmul.f32 q4, q4, q9 \n\t" "vmul.f32 q4, q4, q9 \n\t"
"vmul.f32 q5, q5, q9 \n\t" "vmul.f32 q5, q5, q9 \n\t"
"vmul.f32 q6, q6, q9 \n\t" "vmul.f32 q6, q6, q9 \n\t"
"vmul.f32 q7, q7, q9 \n\t" "vmul.f32 q7, q7, q9 \n\t"
"vmul.f32 q8, q8, q9 \n\t" "vmul.f32 q8, q8, q9 \n\t"
"vadd.f32 q1, q1, q10 \n\t" "vadd.f32 q1, q1, q10 \n\t"
"vadd.f32 q2, q2, q10 \n\t" "vadd.f32 q2, q2, q10 \n\t"
"vadd.f32 q3, q3, q10 \n\t" "vadd.f32 q3, q3, q10 \n\t"
"vadd.f32 q4, q4, q10 \n\t" "vadd.f32 q4, q4, q10 \n\t"
"vadd.f32 q5, q5, q10 \n\t" "vadd.f32 q5, q5, q10 \n\t"
"vadd.f32 q6, q6, q10 \n\t" "vadd.f32 q6, q6, q10 \n\t"
"vadd.f32 q7, q7, q10 \n\t" "vadd.f32 q7, q7, q10 \n\t"
"vadd.f32 q8, q8, q10 \n\t" "vadd.f32 q8, q8, q10 \n\t"
"add %[out_ptr], %[out_ptr], r6 \n\t" "add %[out_ptr], %[out_ptr], r6 \n\t"
"vst1.32 {q1, q2}, [%[out_ptr]]! \n\t" "vst1.32 {q1, q2}, [%[out_ptr]]! \n\t"
"vst1.32 {q3, q4}, [%[out_ptr]]! \n\t" "vst1.32 {q3, q4}, [%[out_ptr]]! \n\t"
"vst1.32 {q5, q6}, [%[out_ptr]]! \n\t" "vst1.32 {q5, q6}, [%[out_ptr]]! \n\t"
"vst1.32 {q7, q8}, [%[out_ptr]]! \n\t" "vst1.32 {q7, q8}, [%[out_ptr]]! \n\t"
"end_remainder_%=: \n\t" "end_remainder_%=: \n\t"
"subs %[C], %[C], #1 \n\t" "subs %[C], %[C], #1 \n\t"
"bge loop_c_%= \n\t" "bge loop_c_%= \n\t"
"end_c_%=: \n\t" "end_c_%=: \n\t"
"subs %[N], %[N], #1 \n\t" "subs %[N], %[N], #1 \n\t"
"bge loop_n_%= \n\t" "bge loop_n_%= \n\t"
"end_n_%=: \n\t" "end_n_%=: \n\t"
: :
: [input_x_ptr] "r"(input_x_ptr), [out_ptr] "r"(out_ptr), : [input_x_ptr] "r"(input_x_ptr), [out_ptr] "r"(out_ptr),
[new_scale_ptr] "r"(new_scale_ptr), [new_bias_ptr] "r"(new_bias_ptr), [new_scale_ptr] "r"(new_scale_ptr), [new_bias_ptr] "r"(new_bias_ptr),
[N] "r"(N), [C] "r"(C), [HXW] "r"(HXW) [N] "r"(N), [C] "r"(C), [HXW] "r"(HXW)
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
"q10", "r5", "r6"); "q10", "r5", "r6");
delete[] inv_std_ptr; delete[] inv_std_ptr;
delete[] new_scale_ptr; delete[] new_scale_ptr;
...@@ -195,11 +195,12 @@ void BatchnormCompute(const BatchNormParam &param) { ...@@ -195,11 +195,12 @@ void BatchnormCompute(const BatchNormParam &param) {
float *inv_std_ptr = new float[C]; float *inv_std_ptr = new float[C];
for (int i = 0; i < C; i++) { for (int i = 0; i < C; i++) {
inv_std_ptr[i] = inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5)); 1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
} }
Tensor new_scale; Tensor new_scale;
auto new_scale_ptr = new_scale.mutable_data<float>(framework::make_ddim({C})); auto new_scale_ptr =
new_scale.mutable_data<float>(framework::make_ddim({C}));
Tensor new_bias; Tensor new_bias;
auto new_bias_ptr = new_bias.mutable_data<float>(framework::make_ddim({C})); auto new_bias_ptr = new_bias.mutable_data<float>(framework::make_ddim({C}));
...@@ -208,7 +209,7 @@ void BatchnormCompute(const BatchNormParam &param) { ...@@ -208,7 +209,7 @@ void BatchnormCompute(const BatchNormParam &param) {
for (int i = 0; i < C; i++) { for (int i = 0; i < C; i++) {
new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i]; new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
new_bias_ptr[i] = new_bias_ptr[i] =
bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i]; bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
{ {
for (int n = 0; n < N; n++) { for (int n = 0; n < N; n++) {
for (int h = 0; h < H; h++) { for (int h = 0; h < H; h++) {
...@@ -216,7 +217,7 @@ void BatchnormCompute(const BatchNormParam &param) { ...@@ -216,7 +217,7 @@ void BatchnormCompute(const BatchNormParam &param) {
for (int w = 0; w < W; w++) { for (int w = 0; w < W; w++) {
int index = tmp_index + w; int index = tmp_index + w;
out_ptr[index] = out_ptr[index] =
input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i]; input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i];
} }
} }
} }
...@@ -227,7 +228,7 @@ void BatchnormCompute(const BatchNormParam &param) { ...@@ -227,7 +228,7 @@ void BatchnormCompute(const BatchNormParam &param) {
} }
} }
} } // namespace operators
} } // namespace paddle_mobile
#endif #endif
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template<typename P> template <typename P>
void ConvAddReluCompute(const FusionConvAddReluParam &param) { void ConvAddReluCompute(const FusionConvAddReluParam &param) {
const Tensor *input = param.Input(); const Tensor *input = param.Input();
Tensor filter = *param.Filter(); Tensor filter = *param.Filter();
...@@ -49,10 +49,10 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) { ...@@ -49,10 +49,10 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) {
framework::DDim col_shape(framework::make_ddim(col_shape_vec)); framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape = framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1); framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand = bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations); math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col; Tensor col;
Tensor col_matrix; Tensor col_matrix;
if (is_expand) { if (is_expand) {
...@@ -62,14 +62,14 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) { ...@@ -62,14 +62,14 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) {
} }
framework::DDim input_shape = framework::slice_ddim( framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size())); input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0], framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]}; filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape); filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = { framework::DDim output_matrix_shape = {
output->dims()[1], output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])}; output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm // convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups; int in_step = static_cast<int>(input->dims()[1]) / groups;
...@@ -110,7 +110,7 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) { ...@@ -110,7 +110,7 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) {
} }
} }
} } // namespace operators
} } // namespace paddle_mobile
#endif #endif
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template<typename P> template <typename P>
void ConvCompute(const ConvParam &param) { void ConvCompute(const ConvParam &param) {
const Tensor *input = param.Input(); const Tensor *input = param.Input();
Tensor filter = *param.Filter(); Tensor filter = *param.Filter();
...@@ -46,7 +46,7 @@ void ConvCompute(const ConvParam &param) { ...@@ -46,7 +46,7 @@ void ConvCompute(const ConvParam &param) {
framework::DDim col_shape(framework::make_ddim(col_shape_vec)); framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape = framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1); framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations); bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col; Tensor col;
...@@ -58,14 +58,14 @@ void ConvCompute(const ConvParam &param) { ...@@ -58,14 +58,14 @@ void ConvCompute(const ConvParam &param) {
} }
framework::DDim input_shape = framework::slice_ddim( framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size())); input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0], framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]}; filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape); filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = { framework::DDim output_matrix_shape = {
output->dims()[1], output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])}; output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm // convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups; int in_step = static_cast<int>(input->dims()[1]) / groups;
...@@ -106,7 +106,7 @@ void ConvCompute(const ConvParam &param) { ...@@ -106,7 +106,7 @@ void ConvCompute(const ConvParam &param) {
} }
} }
} } // namespace operators
} } // namespace paddle_mobile
#endif #endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册