From a19b3225a1da8c31fc996bace3ac09e6f5f177ef Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Sat, 17 Nov 2018 14:56:43 +0000 Subject: [PATCH] fix jitcode small size test=develop --- paddle/fluid/operators/math/jit_code.cc | 12 ++++++++---- paddle/fluid/operators/math/jit_kernel_test.cc | 10 +++++----- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/operators/math/jit_code.cc b/paddle/fluid/operators/math/jit_code.cc index a080079a2d..e484e9a3c7 100644 --- a/paddle/fluid/operators/math/jit_code.cc +++ b/paddle/fluid/operators/math/jit_code.cc @@ -59,9 +59,10 @@ void VXXJitCode::generate() { offset += sizeof(float) * YMM_FLOAT_BLOCK; } int rest = num_ % YMM_FLOAT_BLOCK; - int block = XMM_FLOAT_BLOCK; while (rest > 0) { + int block = XMM_FLOAT_BLOCK; if (rest >= 4) { + block = 4; if (scalar_index_ != 1) { vmovups(xmm_src1, ptr[param1 + offset]); } @@ -69,6 +70,7 @@ void VXXJitCode::generate() { vmovups(xmm_src2, ptr[param2 + offset]); } } else if (rest >= 2) { + block = 2; if (scalar_index_ != 1) { vmovq(xmm_src1, ptr[param1 + offset]); } @@ -76,6 +78,7 @@ void VXXJitCode::generate() { vmovq(xmm_src2, ptr[param2 + offset]); } } else { + block = 1; if (scalar_index_ != 1) { vmovss(xmm_src1, ptr[param1 + offset]); } @@ -105,7 +108,6 @@ void VXXJitCode::generate() { } offset += sizeof(float) * block; rest -= block; - block /= 2; } ret(); } @@ -167,13 +169,16 @@ void VActJitCode::generate() { offset += sizeof(float) * YMM_FLOAT_BLOCK; } int rest = num_ % YMM_FLOAT_BLOCK; - int block = XMM_FLOAT_BLOCK; while (rest > 0) { + int block = XMM_FLOAT_BLOCK; if (rest >= 4) { + block = 4; vmovups(xmm_src, ptr[param1 + offset]); } else if (rest >= 2) { + block = 2; vmovq(xmm_src, ptr[param1 + offset]); } else { + block = 1; vmovss(xmm_src, ptr[param1 + offset]); } switch (type_) { @@ -201,7 +206,6 @@ void VActJitCode::generate() { } offset += sizeof(float) * block; rest -= block; - block /= 2; } ret(); } diff --git a/paddle/fluid/operators/math/jit_kernel_test.cc b/paddle/fluid/operators/math/jit_kernel_test.cc index 932fa4c000..b6c62a2634 100644 --- a/paddle/fluid/operators/math/jit_kernel_test.cc +++ b/paddle/fluid/operators/math/jit_kernel_test.cc @@ -69,7 +69,7 @@ void vrelu_intri8(const int n, const float* x, float* y) { TEST(JitKernel, vrelu) { namespace jit = paddle::operators::math::jitkernel; - for (int d : {7, 8, 15, 16, 30, 256, 512}) { + for (int d : {3, 7, 8, 15, 16, 30, 256, 512}) { std::vector x(d); std::vector zref(d), ztgt(d); RandomVec(d, x.data(), -10.f, 1.f); @@ -159,7 +159,7 @@ void vexp_mkl(const int n, const float* x, float* y) { TEST(JitKernel, vexp) { namespace jit = paddle::operators::math::jitkernel; - for (int d : {7, 8, 12, 15, 16, 20, 30, 128, 256}) { + for (int d : {1, 3, 4, 6, 7, 8, 12, 15, 16, 20, 30, 128, 256}) { std::vector x(d); std::vector zref(d), ztgt(d); RandomVec(d, x.data(), -2.f, 2.f); @@ -234,7 +234,7 @@ void vsigmoid_better( TEST(JitKernel, vsigmoid) { namespace jit = paddle::operators::math::jitkernel; - for (int d : {7, 8, 15, 16, 30, 32, 64, 100, 128, 256}) { + for (int d : {1, 3, 4, 6, 7, 8, 15, 16, 30, 32, 64, 100, 128, 256}) { std::vector x(d); std::vector zref(d), ztgt(d); RandomVec(d, x.data(), -2.f, 2.f); @@ -298,7 +298,7 @@ void vtanh_better( TEST(JitKernel, vtanh) { namespace jit = paddle::operators::math::jitkernel; - for (int d : {7, 8, 15, 16, 30, 32, 64, 100, 128, 256}) { + for (int d : {1, 2, 3, 4, 5, 6, 7, 8, 15, 16, 30, 32, 64, 100, 128, 256}) { std::vector x(d); std::vector zref(d), ztgt(d); RandomVec(d, x.data(), -2.f, 2.f); @@ -389,7 +389,7 @@ void lstm_ctht_better( TEST(JitKernel, lstm) { namespace jit = paddle::operators::math::jitkernel; - for (int d : {7, 8, 15, 16, 30, 32, 64, 100}) { + for (int d : {1, 2, 3, 4, 5, 6, 7, 8, 15, 16, 30, 32, 64, 100}) { int d4 = d * 4; int d3 = d * 3; std::vector x(d4), xref(d4); -- GitLab