From 8bc6381546e2073da7fe18ad7cbca7dcba6dbf42 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 26 Feb 2019 08:42:45 +0000 Subject: [PATCH] fix jitcodekey and refine test test=develop --- paddle/fluid/operators/jit/kernel_key.cc | 27 ++- paddle/fluid/operators/jit/test.cc | 244 +++++++++-------------- 2 files changed, 113 insertions(+), 158 deletions(-) diff --git a/paddle/fluid/operators/jit/kernel_key.cc b/paddle/fluid/operators/jit/kernel_key.cc index c5e659f5766..740d0f850a0 100644 --- a/paddle/fluid/operators/jit/kernel_key.cc +++ b/paddle/fluid/operators/jit/kernel_key.cc @@ -13,6 +13,7 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/kernel_key.h" +#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace operators { @@ -23,14 +24,30 @@ size_t JitCodeKey(const int& d) { return d; } +// TODO(TJ): refine and benchmark JitCodeKey generatation constexpr int act_type_shift = 3; // suppot 2^3 act types +static inline int act_type_convert(KernelType type) { + if (type == kVIdentity) { + return 0; + } else if (type == kVExp) { + return 1; + } else if (type == kVRelu) { + return 2; + } else if (type == kVSigmoid) { + return 3; + } else if (type == kVTanh) { + return 4; + } + PADDLE_THROW("Unsupported act type %d", type); + return 0; +} template <> size_t JitCodeKey(const lstm_attr_t& attr) { size_t key = attr.d; - int gate_key = static_cast(attr.act_gate) << 1; - int cand_key = static_cast(attr.act_cand) << (1 + act_type_shift); - int cell_key = static_cast(attr.act_cell) << (1 + act_type_shift * 2); + int gate_key = act_type_convert(attr.act_gate) << 1; + int cand_key = act_type_convert(attr.act_cand) << (1 + act_type_shift); + int cell_key = act_type_convert(attr.act_cell) << (1 + act_type_shift * 2); return (key << (1 + act_type_shift * 3)) + gate_key + cand_key + cell_key + attr.use_peephole; } @@ -38,8 +55,8 @@ size_t JitCodeKey(const lstm_attr_t& attr) { template <> size_t JitCodeKey(const gru_attr_t& attr) { size_t key = attr.d; - return (key << (act_type_shift * 2)) + static_cast(attr.act_gate) + - (static_cast(attr.act_cand) << act_type_shift); + return (key << (act_type_shift * 2)) + act_type_convert(attr.act_gate) + + (act_type_convert(attr.act_cand) << act_type_shift); } template <> diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index e4335e76d5e..b618cd6a84b 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -40,11 +40,11 @@ template void ExpectEQ(const T* target, const T* refer, size_t n) { if (std::is_floating_point::value) { for (size_t i = 0; i < n; ++i) { - EXPECT_NEAR(target[i], refer[i], FLAGS_acc); + EXPECT_NEAR(target[i], refer[i], FLAGS_acc) << " at index : " << i; } } else { for (size_t i = 0; i < n; ++i) { - EXPECT_EQ(target[i], refer[i]); + EXPECT_EQ(target[i], refer[i]) << " at index : " << i; } } } @@ -447,7 +447,7 @@ void TestAllImpls(const typename KernelTuples::attr_type& attr, Args... args) { } template -void TestXYZNKernel() { +void TestKernelXYZNTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { auto ref = jit::GetRefer>(); @@ -480,7 +480,7 @@ void TestXYZNKernel() { } template -void TestAXYNKernel() { +void TestKernelAXYNTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { auto ref = jit::GetRefer>(); @@ -506,7 +506,7 @@ void TestAXYNKernel() { } template -void TestXRNKernel() { +void TestKernelXRNTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); auto last_acc = FLAGS_acc; FLAGS_acc = 1e-4; @@ -524,7 +524,7 @@ void TestXRNKernel() { } template -void TestXYNKernel() { +void TestKernelXYNTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { auto ref = jit::GetRefer>(); @@ -549,10 +549,12 @@ void TestXYNKernel() { } template -void TestLSTMKernel() { +void TestKernelLSTMTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); std::vector all_acts = {"sigmoid", "tanh", "relu", "identity"}; - for (int d : TestSizes()) { + auto test_sizes = TestSizes(); + test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000)); + for (int d : test_sizes) { for (bool use_peephole : {true, false}) { for (auto& act_gate : all_acts) { for (auto& act_cand : all_acts) { @@ -599,10 +601,12 @@ void TestLSTMKernel() { } template -void TestGRUKernel() { +void TestKernelGRUTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); std::vector all_acts = {"sigmoid", "tanh", "relu", "identity"}; - for (int d : TestSizes()) { + auto test_sizes = TestSizes(); + test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000)); + for (int d : test_sizes) { for (auto& act_gate : all_acts) { for (auto& act_cand : all_acts) { const jit::gru_attr_t attr(d, jit::to_kerneltype(act_gate), @@ -633,14 +637,16 @@ void TestGRUKernel() { } template -void TestSeqPoolKernel() { +void TestKernelSeqPoolTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); std::vector pool_types = { jit::SeqPoolType::kSum, jit::SeqPoolType::kAvg, jit::SeqPoolType::kSqrt}; + auto test_sizes = TestSizes(); + test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000)); for (auto type : pool_types) { - for (int w : TestSizes()) { + for (int w : test_sizes) { jit::seq_pool_attr_t attr(w, type); - for (int h : TestSizes()) { + for (int h : test_sizes) { attr.h = h; auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); @@ -658,11 +664,11 @@ void TestSeqPoolKernel() { } template -void TestMatMulKernel() { +void TestKernelMatMulTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); auto last_acc = FLAGS_acc; - // TODO(intel): fix MKL acc issue - // https://github.com/PaddlePaddle/Paddle/issues/15447 + // export MKL_CBWR=AVX would make MKL force to use AVX + // export KMP_DETERMINISTIC_REDUCTION=yes would make the result deterministic FLAGS_acc = 1e-3; for (int m : {1, 2, 3, 4}) { for (int n : {1, 2, 3, 4}) { @@ -686,7 +692,7 @@ void TestMatMulKernel() { } template -void TestSoftmaxKernel() { +void TestKernelSoftmaxTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int bs : {1, 2, 10}) { for (int n : TestSizes()) { @@ -711,12 +717,14 @@ void TestSoftmaxKernel() { } template -void TestEmbSeqPoolKernel() { +void TestKernelEmbSeqPoolTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); int64_t tbl_h = 1e4; std::vector pool_types = { jit::SeqPoolType::kSum}; // only support sum yet - for (int tbl_w : TestSizes()) { + auto test_sizes = TestSizes(); + test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000)); + for (int tbl_w : test_sizes) { std::vector table(tbl_h * tbl_w); RandomVec(tbl_h * tbl_w, table.data(), -2.f, 2.f); const T* table_data = table.data(); @@ -745,7 +753,7 @@ void TestEmbSeqPoolKernel() { } template -void TestSgdKernel() { +void TestKernelSgdTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); const T lr = 0.1; auto UnDuplicatedRandomVec = [](int n, const int64_t lower, @@ -799,7 +807,7 @@ void TestSgdKernel() { } template -void TestNCHW16CMulNCKernel() { +void TestKernelNCHW16CMulNCTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); const int n = 3, c = 16 * 4, h = 10, w = 10; auto ref = jit::GetRefer>(); @@ -852,7 +860,7 @@ void TestNCHW16CMulNCKernel() { } template -void TestLayerNormKernel() { +void TestKernelLayerNormTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); const T epsilon = 9.99999975e-06; for (int n : {1, 2, 10}) { @@ -891,11 +899,13 @@ void TestLayerNormKernel() { } template -void TestCRFDecodingKernel() { +void TestKernelCRFDecodingTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); constexpr int state_trans_base_idx = 2; + auto test_sizes = TestSizes(); + test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000)); for (int seq_len : {1, 11, 17, 50}) { - for (int tag_num : TestSizes()) { + for (int tag_num : test_sizes) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); int x_sz = seq_len * tag_num; @@ -916,148 +926,76 @@ void TestCRFDecodingKernel() { } } -// XYZNTuple -TEST(JITKernel, kVMul) { - TestXYZNKernel(); - TestXYZNKernel(); -} - -TEST(JITKernel, kVAdd) { - TestXYZNKernel(); - TestXYZNKernel(); -} - -TEST(JITKernel, kVAddRelu) { - TestXYZNKernel(); - TestXYZNKernel(); -} - -TEST(JITKernel, kVSub) { - TestXYZNKernel(); - TestXYZNKernel(); -} - -// AXYNTuples -TEST(JITKernel, kVScal) { - TestAXYNKernel(); - TestAXYNKernel(); -} - -TEST(JITKernel, kVAddBias) { - TestAXYNKernel(); - TestAXYNKernel(); -} - -// XRNTuples -TEST(JITKernel, kHMax) { - TestXRNKernel(); - TestXRNKernel(); -} - -TEST(JITKernel, kHSum) { - TestXRNKernel(); - TestXRNKernel(); -} - -// XYNTuples -TEST(JITKernel, kVRelu) { - TestXYNKernel(); - TestXYNKernel(); -} - -TEST(JITKernel, kVIdentity) { - TestXYNKernel(); - TestXYNKernel(); -} - -TEST(JITKernel, kVSquare) { - TestXYNKernel(); - TestXYNKernel(); -} +#define TEST_CPU_KERNEL(test_tuple, kernel_type) \ + TEST(JITKernel, kernel_type) { \ + TestKernel##test_tuple(); \ + TestKernel##test_tuple(); \ + } -TEST(JITKernel, kVExp) { - TestXYNKernel(); - TestXYNKernel(); -} +TEST_CPU_KERNEL(XYZNTuples, kVMul); +TEST_CPU_KERNEL(XYZNTuples, kVAdd); +TEST_CPU_KERNEL(XYZNTuples, kVAddRelu); +TEST_CPU_KERNEL(XYZNTuples, kVSub); -TEST(JITKernel, kVSigmoid) { - TestXYNKernel(); - TestXYNKernel(); -} +TEST_CPU_KERNEL(AXYNTuples, kVScal); +TEST_CPU_KERNEL(AXYNTuples, kVAddBias); -TEST(JITKernel, kVTanh) { - TestXYNKernel(); - TestXYNKernel(); -} +TEST_CPU_KERNEL(XRNTuples, kHMax); +TEST_CPU_KERNEL(XRNTuples, kHSum); -// LSTM -TEST(JITKernel, kLSTMCtHt) { - TestLSTMKernel(); - TestLSTMKernel(); -} +TEST_CPU_KERNEL(XYNTuples, kVRelu); +TEST_CPU_KERNEL(XYNTuples, kVIdentity); +TEST_CPU_KERNEL(XYNTuples, kVSquare); +TEST_CPU_KERNEL(XYNTuples, kVExp); +TEST_CPU_KERNEL(XYNTuples, kVSigmoid); +TEST_CPU_KERNEL(XYNTuples, kVTanh); -TEST(JITKernel, kLSTMC1H1) { - TestLSTMKernel(); - TestLSTMKernel(); -} +TEST_CPU_KERNEL(LSTMTuples, kLSTMCtHt); +TEST_CPU_KERNEL(LSTMTuples, kLSTMC1H1); -// GRU -TEST(JITKernel, kGRUH1) { - TestGRUKernel(); - TestGRUKernel(); -} +TEST_CPU_KERNEL(GRUTuples, kGRUH1); +TEST_CPU_KERNEL(GRUTuples, kGRUHtPart1); +TEST_CPU_KERNEL(GRUTuples, kGRUHtPart2); -TEST(JITKernel, kGRUHtPart1) { - TestGRUKernel(); - TestGRUKernel(); -} +TEST_CPU_KERNEL(NCHW16CMulNCTuples, kNCHW16CMulNC); -TEST(JITKernel, kGRUHtPart2) { - TestGRUKernel(); - TestGRUKernel(); -} +TEST_CPU_KERNEL(SeqPoolTuples, kSeqPool); +TEST_CPU_KERNEL(MatMulTuples, kMatMul); +TEST_CPU_KERNEL(SoftmaxTuples, kSoftmax); +TEST_CPU_KERNEL(EmbSeqPoolTuples, kEmbSeqPool); +TEST_CPU_KERNEL(SgdTuples, kSgd); +TEST_CPU_KERNEL(LayerNormTuples, kLayerNorm); +TEST_CPU_KERNEL(CRFDecodingTuples, kCRFDecoding); -TEST(JITKernel, kSeqPool) { - TestSeqPoolKernel(); - TestSeqPoolKernel(); -} - -TEST(JITKernel, kMatMul) { - TestMatMulKernel(); - TestMatMulKernel(); -} - -TEST(JITKernel, kSoftmax) { - TestSoftmaxKernel(); - TestSoftmaxKernel(); -} +TEST(JITKernel_key, lstm) { + jit::lstm_attr_t attr1(8, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh); + jit::lstm_attr_t attr2(9, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh); + jit::lstm_attr_t attr3(9, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh); + jit::lstm_attr_t attr4(9, jit::kVRelu, jit::kVSigmoid, jit::kVTanh); -TEST(JITKernel, kEmbSeqPool) { - TestEmbSeqPoolKernel(); - TestEmbSeqPoolKernel(); -} + auto key1 = jit::JitCodeKey(attr1); + auto key2 = jit::JitCodeKey(attr2); + auto key3 = jit::JitCodeKey(attr3); + auto key4 = jit::JitCodeKey(attr4); -TEST(JITKernel, kSgd) { - TestSgdKernel(); - TestSgdKernel(); + EXPECT_TRUE(key1 != key2); + EXPECT_TRUE(key2 == key3); + EXPECT_TRUE(key3 != key4); } -TEST(JITKernel, kNCHW16CMulNC) { - TestNCHW16CMulNCKernel(); - TestNCHW16CMulNCKernel(); -} +TEST(JITKernel_key, gru) { + jit::gru_attr_t attr1(8, jit::kVSigmoid, jit::kVTanh); + jit::gru_attr_t attr2(9, jit::kVSigmoid, jit::kVTanh); + jit::gru_attr_t attr3(9, jit::kVSigmoid, jit::kVTanh); + jit::gru_attr_t attr4(9, jit::kVSigmoid, jit::kVIdentity); -TEST(JITKernel, kLayerNorm) { - TestLayerNormKernel(); - TestLayerNormKernel(); -} - -TEST(JITKernel, kCRFDecoding) { - TestCRFDecodingKernel(); - TestCRFDecodingKernel(); -} + auto key1 = jit::JitCodeKey(attr1); + auto key2 = jit::JitCodeKey(attr2); + auto key3 = jit::JitCodeKey(attr3); + auto key4 = jit::JitCodeKey(attr4); -TEST(JITKernel, pool) { - // TODO(TJ): add some test + EXPECT_TRUE(key1 != key2); + EXPECT_TRUE(key2 == key3); + EXPECT_TRUE(key3 != key4); } +// TODO(TJ): add more test about key and pool -- GitLab