提交 24b0736b 编写于 作者: L liuruilong

format files

上级 06be382a
......@@ -70,7 +70,7 @@ class FusionConvAddOp : public framework::OperatorWithKernel<
#ifdef PADDLE_MOBILE_CPU
#ifndef CONV_ADD_REGISTER
static framework::FusionOpRegistrar convadd_registrar(
new FusionConvAddMatcher());
new FusionConvAddMatcher());
#define CONV_ADD_REGISTER
#endif
#endif
......@@ -79,7 +79,7 @@ static framework::FusionOpRegistrar convadd_registrar(
#ifndef CONV_ADD_REGISTER
static framework::FusionOpRegistrar convadd_registrar(
new FusionConvAddMatcher());
new FusionConvAddMatcher());
#define CONV_ADD_REGISTER
#endif
......
......@@ -21,7 +21,7 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
template<typename P>
template <typename P>
void BatchnormCompute(const BatchNormParam &param) {
const Tensor *input_x = param.InputX();
auto input_x_ptr = input_x->data<float>();
......@@ -63,7 +63,7 @@ void BatchnormCompute(const BatchNormParam &param) {
for (int i = 0; i < C * 4; i += 4) {
int index = i / 4;
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[index] + epsilon), 0.5));
1 / static_cast<float>(pow((variance_ptr[index] + epsilon), 0.5));
inv_std_ptr[i + 1] = inv_std_ptr[i];
inv_std_ptr[i + 2] = inv_std_ptr[i];
inv_std_ptr[i + 3] = inv_std_ptr[i];
......@@ -74,7 +74,7 @@ void BatchnormCompute(const BatchNormParam &param) {
new_scale_ptr[i + 3] = new_scale_ptr[i];
new_bias_ptr[i] =
bias_ptr[index] - mean_ptr[index] * inv_std_ptr[i] * scale_ptr[index];
bias_ptr[index] - mean_ptr[index] * inv_std_ptr[i] * scale_ptr[index];
new_bias_ptr[i + 1] = new_bias_ptr[i];
new_bias_ptr[i + 2] = new_bias_ptr[i];
......@@ -87,105 +87,105 @@ void BatchnormCompute(const BatchNormParam &param) {
}
asm volatile(
"subs %[N], %[N], #1 \n\t"
"blt end_n_%= \n\t"
"loop_n_%=: \n\t"
"subs %[C], %[C], #1 \n\t"
"blt end_c_%= \n\t"
"loop_c_%=: \n\t"
"vld1.32 {q9}, [%[new_scale_ptr]]! \n\t"
"vld1.32 {q10}, [%[new_bias_ptr]]! \n\t"
"mov r6, %[HXW] \n\t"
"subs r6, r6, #32 \n\t"
"blt end_hw_%= \n\t"
"loop_hw_%=: \n\t"
"vld1.32 {q1, q2}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q3, q4}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q5, q6}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q7, q8}, [%[input_x_ptr]]! \n\t"
"vmul.f32 q1, q1, q9 \n\t"
"vmul.f32 q2, q2, q9 \n\t"
"vmul.f32 q3, q3, q9 \n\t"
"vmul.f32 q4, q4, q9 \n\t"
"vmul.f32 q5, q5, q9 \n\t"
"vmul.f32 q6, q6, q9 \n\t"
"vmul.f32 q7, q7, q9 \n\t"
"vmul.f32 q8, q8, q9 \n\t"
"vadd.f32 q1, q1, q10 \n\t"
"vadd.f32 q2, q2, q10 \n\t"
"vadd.f32 q3, q3, q10 \n\t"
"vadd.f32 q4, q4, q10 \n\t"
"vadd.f32 q5, q5, q10 \n\t"
"vadd.f32 q6, q6, q10 \n\t"
"vadd.f32 q7, q7, q10 \n\t"
"vadd.f32 q8, q8, q10 \n\t"
"vst1.32 {q1, q2}, [%[out_ptr]]! \n\t"
"vst1.32 {q3, q4}, [%[out_ptr]]! \n\t"
"vst1.32 {q5, q6}, [%[out_ptr]]! \n\t"
"vst1.32 {q7, q8}, [%[out_ptr]]! \n\t"
"subs r6, r6, #32 \n\t"
"bge loop_hw_%= \n\t"
"end_hw_%=: \n\t"
"cmp r6, #0 \n\t"
"bge end_remainder_%= \n\t"
"mov r5, #4 \n\t"
"mul r6, r6, r5 \n\t"
"add %[input_x_ptr], %[input_x_ptr], r6 \n\t"
"vld1.32 {q1, q2}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q3, q4}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q5, q6}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q7, q8}, [%[input_x_ptr]]! \n\t"
"vmul.f32 q1, q1, q9 \n\t"
"vmul.f32 q2, q2, q9 \n\t"
"vmul.f32 q3, q3, q9 \n\t"
"vmul.f32 q4, q4, q9 \n\t"
"vmul.f32 q5, q5, q9 \n\t"
"vmul.f32 q6, q6, q9 \n\t"
"vmul.f32 q7, q7, q9 \n\t"
"vmul.f32 q8, q8, q9 \n\t"
"vadd.f32 q1, q1, q10 \n\t"
"vadd.f32 q2, q2, q10 \n\t"
"vadd.f32 q3, q3, q10 \n\t"
"vadd.f32 q4, q4, q10 \n\t"
"vadd.f32 q5, q5, q10 \n\t"
"vadd.f32 q6, q6, q10 \n\t"
"vadd.f32 q7, q7, q10 \n\t"
"vadd.f32 q8, q8, q10 \n\t"
"add %[out_ptr], %[out_ptr], r6 \n\t"
"vst1.32 {q1, q2}, [%[out_ptr]]! \n\t"
"vst1.32 {q3, q4}, [%[out_ptr]]! \n\t"
"vst1.32 {q5, q6}, [%[out_ptr]]! \n\t"
"vst1.32 {q7, q8}, [%[out_ptr]]! \n\t"
"end_remainder_%=: \n\t"
"subs %[C], %[C], #1 \n\t"
"bge loop_c_%= \n\t"
"end_c_%=: \n\t"
"subs %[N], %[N], #1 \n\t"
"bge loop_n_%= \n\t"
"end_n_%=: \n\t"
:
: [input_x_ptr] "r"(input_x_ptr), [out_ptr] "r"(out_ptr),
[new_scale_ptr] "r"(new_scale_ptr), [new_bias_ptr] "r"(new_bias_ptr),
[N] "r"(N), [C] "r"(C), [HXW] "r"(HXW)
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
"q10", "r5", "r6");
"subs %[N], %[N], #1 \n\t"
"blt end_n_%= \n\t"
"loop_n_%=: \n\t"
"subs %[C], %[C], #1 \n\t"
"blt end_c_%= \n\t"
"loop_c_%=: \n\t"
"vld1.32 {q9}, [%[new_scale_ptr]]! \n\t"
"vld1.32 {q10}, [%[new_bias_ptr]]! \n\t"
"mov r6, %[HXW] \n\t"
"subs r6, r6, #32 \n\t"
"blt end_hw_%= \n\t"
"loop_hw_%=: \n\t"
"vld1.32 {q1, q2}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q3, q4}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q5, q6}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q7, q8}, [%[input_x_ptr]]! \n\t"
"vmul.f32 q1, q1, q9 \n\t"
"vmul.f32 q2, q2, q9 \n\t"
"vmul.f32 q3, q3, q9 \n\t"
"vmul.f32 q4, q4, q9 \n\t"
"vmul.f32 q5, q5, q9 \n\t"
"vmul.f32 q6, q6, q9 \n\t"
"vmul.f32 q7, q7, q9 \n\t"
"vmul.f32 q8, q8, q9 \n\t"
"vadd.f32 q1, q1, q10 \n\t"
"vadd.f32 q2, q2, q10 \n\t"
"vadd.f32 q3, q3, q10 \n\t"
"vadd.f32 q4, q4, q10 \n\t"
"vadd.f32 q5, q5, q10 \n\t"
"vadd.f32 q6, q6, q10 \n\t"
"vadd.f32 q7, q7, q10 \n\t"
"vadd.f32 q8, q8, q10 \n\t"
"vst1.32 {q1, q2}, [%[out_ptr]]! \n\t"
"vst1.32 {q3, q4}, [%[out_ptr]]! \n\t"
"vst1.32 {q5, q6}, [%[out_ptr]]! \n\t"
"vst1.32 {q7, q8}, [%[out_ptr]]! \n\t"
"subs r6, r6, #32 \n\t"
"bge loop_hw_%= \n\t"
"end_hw_%=: \n\t"
"cmp r6, #0 \n\t"
"bge end_remainder_%= \n\t"
"mov r5, #4 \n\t"
"mul r6, r6, r5 \n\t"
"add %[input_x_ptr], %[input_x_ptr], r6 \n\t"
"vld1.32 {q1, q2}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q3, q4}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q5, q6}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q7, q8}, [%[input_x_ptr]]! \n\t"
"vmul.f32 q1, q1, q9 \n\t"
"vmul.f32 q2, q2, q9 \n\t"
"vmul.f32 q3, q3, q9 \n\t"
"vmul.f32 q4, q4, q9 \n\t"
"vmul.f32 q5, q5, q9 \n\t"
"vmul.f32 q6, q6, q9 \n\t"
"vmul.f32 q7, q7, q9 \n\t"
"vmul.f32 q8, q8, q9 \n\t"
"vadd.f32 q1, q1, q10 \n\t"
"vadd.f32 q2, q2, q10 \n\t"
"vadd.f32 q3, q3, q10 \n\t"
"vadd.f32 q4, q4, q10 \n\t"
"vadd.f32 q5, q5, q10 \n\t"
"vadd.f32 q6, q6, q10 \n\t"
"vadd.f32 q7, q7, q10 \n\t"
"vadd.f32 q8, q8, q10 \n\t"
"add %[out_ptr], %[out_ptr], r6 \n\t"
"vst1.32 {q1, q2}, [%[out_ptr]]! \n\t"
"vst1.32 {q3, q4}, [%[out_ptr]]! \n\t"
"vst1.32 {q5, q6}, [%[out_ptr]]! \n\t"
"vst1.32 {q7, q8}, [%[out_ptr]]! \n\t"
"end_remainder_%=: \n\t"
"subs %[C], %[C], #1 \n\t"
"bge loop_c_%= \n\t"
"end_c_%=: \n\t"
"subs %[N], %[N], #1 \n\t"
"bge loop_n_%= \n\t"
"end_n_%=: \n\t"
:
: [input_x_ptr] "r"(input_x_ptr), [out_ptr] "r"(out_ptr),
[new_scale_ptr] "r"(new_scale_ptr), [new_bias_ptr] "r"(new_bias_ptr),
[N] "r"(N), [C] "r"(C), [HXW] "r"(HXW)
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
"q10", "r5", "r6");
delete[] inv_std_ptr;
delete[] new_scale_ptr;
......@@ -195,11 +195,12 @@ void BatchnormCompute(const BatchNormParam &param) {
float *inv_std_ptr = new float[C];
for (int i = 0; i < C; i++) {
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
}
Tensor new_scale;
auto new_scale_ptr = new_scale.mutable_data<float>(framework::make_ddim({C}));
auto new_scale_ptr =
new_scale.mutable_data<float>(framework::make_ddim({C}));
Tensor new_bias;
auto new_bias_ptr = new_bias.mutable_data<float>(framework::make_ddim({C}));
......@@ -208,7 +209,7 @@ void BatchnormCompute(const BatchNormParam &param) {
for (int i = 0; i < C; i++) {
new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
new_bias_ptr[i] =
bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
{
for (int n = 0; n < N; n++) {
for (int h = 0; h < H; h++) {
......@@ -216,7 +217,7 @@ void BatchnormCompute(const BatchNormParam &param) {
for (int w = 0; w < W; w++) {
int index = tmp_index + w;
out_ptr[index] =
input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i];
input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i];
}
}
}
......@@ -227,7 +228,7 @@ void BatchnormCompute(const BatchNormParam &param) {
}
}
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -20,7 +20,7 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
template<typename P>
template <typename P>
void ConvAddReluCompute(const FusionConvAddReluParam &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
......@@ -49,10 +49,10 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) {
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
......@@ -62,14 +62,14 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) {
}
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
......@@ -110,7 +110,7 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) {
}
}
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -20,7 +20,7 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
template<typename P>
template <typename P>
void ConvCompute(const ConvParam &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
......@@ -46,7 +46,7 @@ void ConvCompute(const ConvParam &param) {
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
......@@ -58,14 +58,14 @@ void ConvCompute(const ConvParam &param) {
}
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
......@@ -106,7 +106,7 @@ void ConvCompute(const ConvParam &param) {
}
}
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册