提交 4403de09 编写于 作者: C chenjiaoAngel

fix format, testt=develop

上级 13a95dc6
......@@ -164,16 +164,16 @@ void transpose(const Dtype* din, Dtype* dout, int m, int n) {
Dtype* dout_ptr1 = dout_ptr0 + n;
Dtype* dout_ptr2 = dout_ptr1 + n;
Dtype* dout_ptr3 = dout_ptr2 + n;
float32x4x2_t tmp0 = vtrnq_f32(din0, din1); // a00 b00 a02 b02
//float32x4_t tmp1 = vtrn2q_f32(din0, din1); // a01 b01 a03 b03
float32x4x2_t tmp2 = vtrnq_f32(din2, din3); // c00 d00 c02 d02
// float32x4_t tmp3 = vtrn2q_f32(din2, din3); // c01 d01 c03 d03
// a00 b00 a02 b02 a01 b01 a03 b03
float32x4x2_t tmp0 = vtrnq_f32(din0, din1);
// c00 d00 c02 d02 c01 d01 c03 d03
float32x4x2_t tmp2 = vtrnq_f32(din2, din3);
din_ptr0 += 4;
din_ptr1 += 4;
float32x4x2_t tmp00 = vtrnq_f32(tmp0.val[0], tmp2.val[0]); // a00 b00 c00 d00
// float32x4_t tmp01 = vtrn2q_f32(tmp0, tmp2); // a02 b02 c02 d02
float32x4x2_t tmp02 = vtrnq_f32(tmp0.val[1], tmp2.val[1]); // a01 b01 c01 d01
// float32x4_t tmp03 = vtrn2q_f32(tmp1, tmp3); // a03 b03 c03 d03
// a00 b00 c00 d00 a02 b02 c02 d02
float32x4x2_t tmp00 = vtrnq_f32(tmp0.val[0], tmp2.val[0]);
// a01 b01 c01 d01 a03 b03 c03 d03
float32x4x2_t tmp02 = vtrnq_f32(tmp0.val[1], tmp2.val[1]);
din_ptr2 += 4;
din_ptr3 += 4;
vst1q_f32(dout_ptr0, tmp00.val[0]);
......@@ -193,7 +193,7 @@ void transpose(const Dtype* din, Dtype* dout, int m, int n) {
for (int x = 0; x < remain_n; x++) {
Dtype* dout_ptr0 = dout + x * 4;
for (int y = 0; y < cnt_m; y++) {
float32x4_t din0 = vld1q_f32(din_ptr0); // a00 a01 a02 a03
float32x4_t din0 = vld1q_f32(din_ptr0);
dout_ptr0 += nn_num;
Dtype* dout_ptr1 = dout_ptr0 + n;
Dtype* dout_ptr2 = dout_ptr1 + n;
......
......@@ -17,8 +17,8 @@ limitations under the License. */
#include <cstddef>
#include <string>
#include <vector>
#include "lite/backends/arm/math/conv_impl.h"
#include "lite/backends/arm/math/conv_block_utils.h"
#include "lite/backends/arm/math/conv_impl.h"
#include "lite/backends/arm/math/sgemm.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
......@@ -108,8 +108,7 @@ void SequenceConvCompute::Run() {
kernel_size * hidden_dim,
input_row_end - input_row_begin);
#else
paddle::lite::arm::math::transpose(
tmp_data,
paddle::lite::arm::math::transpose(tmp_data,
sub_col_data,
kernel_size * hidden_dim,
input_row_end - input_row_begin);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册