提交 c08fbb01 编写于 作者: C chenjiaoAngel

fix format. test=develop

上级 e3b509fb
......@@ -75,10 +75,10 @@ void GroupNormCompute::Run() {
in_p += 16;
}
for (int i = 0; i < remain - 3; i += 4) {
float32x4_t in0 = vld1q_f32(in_p);
sum1 = vaddq_f32(sum1, in0);
summ1 = vmlaq_f32(summ1, in0, in0);
in_p += 4;
float32x4_t in0 = vld1q_f32(in_p);
sum1 = vaddq_f32(sum1, in0);
summ1 = vmlaq_f32(summ1, in0, in0);
in_p += 4;
}
float sum = 0.0;
float summ = 0.0;
......@@ -87,9 +87,9 @@ void GroupNormCompute::Run() {
summ0 = vaddq_f32(summ0, summ1);
summ2 = vaddq_f32(summ2, summ3);
for (int i = 0; i < remain % 4; i++) {
sum += *in_p;
summ += (*in_p) * (*in_p);
in_p++;
sum += *in_p;
summ += (*in_p) * (*in_p);
in_p++;
}
sum0 = vaddq_f32(sum0, sum2);
summ0 = vaddq_f32(summ0, summ2);
......@@ -125,37 +125,37 @@ void GroupNormCompute::Run() {
const float32x4_t vbias = vdupq_n_f32(bias_val);
const float32x4_t vmean = vdupq_n_f32(mean_val);
for (int k = 0; k < cnt; k++) {
float32x4_t in0 = vld1q_f32(in_p);
float32x4_t in1 = vld1q_f32(in_p + 4);
float32x4_t in2 = vld1q_f32(in_p + 8);
float32x4_t in3 = vld1q_f32(in_p + 12);
float32x4_t submean0 = vsubq_f32(in0, vmean);
float32x4_t submean1 = vsubq_f32(in1, vmean);
float32x4_t submean2 = vsubq_f32(in2, vmean);
float32x4_t submean3 = vsubq_f32(in3, vmean);
float32x4_t out0 = vmlaq_f32(vbias, submean0, vsstd);
float32x4_t out1 = vmlaq_f32(vbias, submean1, vsstd);
float32x4_t out2 = vmlaq_f32(vbias, submean2, vsstd);
float32x4_t out3 = vmlaq_f32(vbias, submean3, vsstd);
vst1q_f32(out_p, out0);
vst1q_f32(out_p + 4, out0);
vst1q_f32(out_p + 8, out0);
vst1q_f32(out_p + 12, out0);
in_p += 16;
out_p += 16;
float32x4_t in0 = vld1q_f32(in_p);
float32x4_t in1 = vld1q_f32(in_p + 4);
float32x4_t in2 = vld1q_f32(in_p + 8);
float32x4_t in3 = vld1q_f32(in_p + 12);
float32x4_t submean0 = vsubq_f32(in0, vmean);
float32x4_t submean1 = vsubq_f32(in1, vmean);
float32x4_t submean2 = vsubq_f32(in2, vmean);
float32x4_t submean3 = vsubq_f32(in3, vmean);
float32x4_t out0 = vmlaq_f32(vbias, submean0, vsstd);
float32x4_t out1 = vmlaq_f32(vbias, submean1, vsstd);
float32x4_t out2 = vmlaq_f32(vbias, submean2, vsstd);
float32x4_t out3 = vmlaq_f32(vbias, submean3, vsstd);
vst1q_f32(out_p, out0);
vst1q_f32(out_p + 4, out0);
vst1q_f32(out_p + 8, out0);
vst1q_f32(out_p + 12, out0);
in_p += 16;
out_p += 16;
}
for (int k = 0; k < remain - 3; k += 4) {
float32x4_t in0 = vld1q_f32(in_p);
in_p += 4;
float32x4_t submean0 = vsubq_f32(in0, vmean);
float32x4_t out0 = vmlaq_f32(vbias, submean0, vsstd);
vst1q_f32(out_p, out0);
out_p += 4;
float32x4_t in0 = vld1q_f32(in_p);
in_p += 4;
float32x4_t submean0 = vsubq_f32(in0, vmean);
float32x4_t out0 = vmlaq_f32(vbias, submean0, vsstd);
vst1q_f32(out_p, out0);
out_p += 4;
}
for (int k = 0; k < remain % 4; k++) {
*out_p = (*in_p - mean_val) * sstd_val + bias_val;
in_p++;
out_p++;
*out_p = (*in_p - mean_val) * sstd_val + bias_val;
in_p++;
out_p++;
}
}
}
......
......@@ -84,5 +84,4 @@ bool DeformableConvOpLite::InferShapeImpl() const {
} // namespace lite
} // namespace paddle
REGISTER_LITE_OP(deformconv2d,
paddle::lite::operators::DeformableConvOpLite);
REGISTER_LITE_OP(deformconv2d, paddle::lite::operators::DeformableConvOpLite);
......@@ -39,8 +39,10 @@ bool GroupNormOp::CheckShape() const {
CHECK_EQ(bias_dims.size(), 1UL) << "Input Bias must have 1 dimensions.";
CHECK_GT(param_.epsilon, 0.f) << "epsilon should be greater than 0.f";
CHECK_LT(param_.epsilon, 0.01f) << "epsilon should be less than 0.01f";
CHECK_EQ(param_.channels, x_dims[1]) << "Input channels must be equal input_shape[1]";
CHECK_EQ(param_.channels % param_.groups, 0) << "channels must be divide groups";
CHECK_EQ(param_.channels, x_dims[1])
<< "Input channels must be equal input_shape[1]";
CHECK_EQ(param_.channels % param_.groups, 0)
<< "channels must be divide groups";
return true;
}
......@@ -54,8 +56,7 @@ bool GroupNormOp::InferShapeImpl() const {
return true;
}
bool GroupNormOp::AttachImpl(const cpp::OpDesc& op_desc,
lite::Scope* scope) {
bool GroupNormOp::AttachImpl(const cpp::OpDesc& op_desc, lite::Scope* scope) {
param_.x = scope->FindVar(op_desc.Input("X").front())->GetMutable<Tensor>();
param_.scale =
scope->FindVar(op_desc.Input("Scale").front())->GetMutable<Tensor>();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册