/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include "gflags/gflags.h" #include "glog/logging.h" #include "gtest/gtest.h" #include "paddle/fluid/operators/jit/kernels.h" #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/place.h" DEFINE_double(acc, 1e-5, "Test accuracy threshold."); template void RandomVec(const int n, T* a, const T lower = static_cast(-20.f), const T upper = static_cast(20.f)) { static unsigned int seed = 100; std::mt19937 rng(seed++); std::uniform_real_distribution uniform_dist(0, 1); for (int i = 0; i < n; ++i) { a[i] = static_cast(uniform_dist(rng) * (upper - lower) + lower); } } template void ExpectEQ(const T* target, const T* refer, int n) { if (std::is_floating_point::value) { for (int i = 0; i < n; ++i) { EXPECT_NEAR(target[i], refer[i], FLAGS_acc); } } else { for (int i = 0; i < n; ++i) { EXPECT_EQ(target[i], refer[i]); } } } std::vector TestSizes() { std::vector s; for (int i = 1; i < 32; ++i) { s.push_back(i); } // test some large size s.push_back(100); s.push_back(1000); s.push_back(2000); return s; } namespace jit = paddle::operators::jit; using CPUPlace = paddle::platform::CPUPlace; template struct TestFuncWithRefer { void operator()(const typename KernelTuples::func_type tgt, Args... args) { LOG(FATAL) << "Should specify this function."; } }; template struct TestFuncWithRefer, std::vector, std::vector, std::vector> { void operator()(const typename jit::XYZNTuples::func_type tgt, const std::vector& x, const std::vector& y, const std::vector& zref) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(zref.size(), x.size()); EXPECT_EQ(zref.size(), y.size()); const T* x_data = x.data(); const T* y_data = y.data(); const T* zref_data = zref.data(); const int d = zref.size(); std::vector ztgt(d); T* ztgt_data = ztgt.data(); // test normal tgt(x_data, y_data, ztgt_data, d); ExpectEQ(ztgt_data, zref_data, d); // test inplace x std::copy(x.begin(), x.end(), ztgt.begin()); tgt(ztgt_data, y_data, ztgt_data, d); ExpectEQ(ztgt_data, zref_data, d); // test inplace y std::copy(y.begin(), y.end(), ztgt.begin()); tgt(x_data, ztgt_data, ztgt_data, d); ExpectEQ(ztgt_data, zref_data, d); } }; template struct TestFuncWithRefer, T, std::vector, std::vector> { void operator()(const typename jit::AXYNTuples::func_type tgt, const T a, const std::vector& x, const std::vector& yref) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(yref.size(), x.size()); const T* x_data = x.data(); const T* yref_data = yref.data(); const int d = yref.size(); std::vector ytgt(d); T* ytgt_data = ytgt.data(); // test normal tgt(&a, x_data, ytgt_data, d); ExpectEQ(ytgt_data, yref_data, d); // test inplace x std::copy(x.begin(), x.end(), ytgt.begin()); tgt(&a, ytgt_data, ytgt_data, d); ExpectEQ(ytgt_data, yref_data, d); } }; template struct TestFuncWithRefer, std::vector, std::vector, int, int> { void operator()(const typename jit::SoftmaxTuples::func_type tgt, const std::vector& x, const std::vector& yref, int n, int bs) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(yref.size(), x.size()); EXPECT_EQ(x.size(), static_cast(n * bs)); const T* x_data = x.data(); const T* yref_data = yref.data(); std::vector ytgt(n * bs); T* ytgt_data = ytgt.data(); // test normal tgt(x_data, ytgt_data, n, bs); ExpectEQ(ytgt_data, yref_data, n * bs); // test inplace x std::copy(x.begin(), x.end(), ytgt.begin()); tgt(ytgt_data, ytgt_data, n, bs); ExpectEQ(ytgt_data, yref_data, n * bs); } }; template struct TestFuncWithRefer, std::vector, T> { void operator()(const typename jit::XRNTuples::func_type tgt, const std::vector& x, const T ref_res) { EXPECT_TRUE(tgt != nullptr); T tgt_res; tgt(x.data(), &tgt_res, x.size()); ExpectEQ(&tgt_res, &ref_res, 1); } }; template struct TestFuncWithRefer, std::vector, std::vector> { void operator()(const typename jit::XYNTuples::func_type tgt, const std::vector& x, const std::vector& yref) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(yref.size(), x.size()); const T* x_data = x.data(); const T* yref_data = yref.data(); const int d = yref.size(); std::vector ytgt(d); T* ytgt_data = ytgt.data(); // test normal tgt(x_data, ytgt_data, d); ExpectEQ(ytgt_data, yref_data, d); // test inplace x std::copy(x.begin(), x.end(), ytgt.begin()); tgt(ytgt_data, ytgt_data, d); ExpectEQ(ytgt_data, yref_data, d); } }; template struct TestFuncWithRefer, std::vector, std::vector, std::vector, std::vector, std::vector, typename jit::LSTMTuples::attr_type> { void operator()(const typename jit::LSTMTuples::func_type tgt, const std::vector& xsrc, const std::vector& wp, const std::vector& ct_1, const std::vector& ct_ref, const std::vector& ht_ref, const typename jit::LSTMTuples::attr_type& attr) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(ct_ref.size(), ht_ref.size()); EXPECT_EQ(ct_1.size(), ht_ref.size()); EXPECT_EQ(xsrc.size(), 4 * ht_ref.size()); EXPECT_EQ(wp.size(), 3 * ht_ref.size()); // x could be changed after compute, so copy to save src int d = ht_ref.size(); std::vector x(xsrc.size()), ct(ct_ref.size()), ht(ht_ref.size()); std::vector checked(2 * d); std::copy(xsrc.begin(), xsrc.end(), x.begin()); const T* ct_1_data = ct_1.data(); const T* wp_data = wp.data(); const T* ct_ref_data = ct_ref.data(); const T* ht_ref_data = ht_ref.data(); T* x_data = x.data(); T* ct_data = ct.data(); T* ht_data = ht.data(); T* checked_data = checked.data(); jit::lstm_t step; step.gates = x_data; step.ct_1 = ct_1_data; step.ct = ct_data; step.ht = ht_data; if (attr.use_peephole) { step.wp = wp_data; step.checked = checked_data; } tgt(&step, &attr); ExpectEQ(ct_data, ct_ref_data, d); ExpectEQ(ht_data, ht_ref_data, d); } }; template struct TestFuncWithRefer, std::vector, std::vector, std::vector, typename jit::GRUTuples::attr_type> { void operator()(const typename jit::GRUTuples::func_type tgt, const std::vector& xsrc, const std::vector& ht_1, const std::vector& ht_ref, const typename jit::GRUTuples::attr_type& attr) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(ht_1.size(), ht_ref.size()); EXPECT_EQ(xsrc.size(), 3 * ht_ref.size()); // x could be changed after compute, so copy to save src int d = ht_ref.size(); std::vector x(xsrc.size()), ht(ht_ref.size()); std::copy(xsrc.begin(), xsrc.end(), x.begin()); const T* ht_1_data = ht_1.data(); const T* ht_ref_data = ht_ref.data(); T* x_data = x.data(); T* ht_data = ht.data(); jit::gru_t step; step.gates = x_data; step.ht_1 = ht_1_data; step.ht = ht_data; tgt(&step, &attr); ExpectEQ(ht_data, ht_ref_data, d); } }; template struct TestFuncWithRefer, std::vector, std::vector, typename jit::SeqPoolTuples::attr_type> { void operator()(const typename jit::SeqPoolTuples::func_type tgt, const std::vector& x, const std::vector& yref, const typename jit::SeqPoolTuples::attr_type& attr) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(x.size() % yref.size(), 0); int w = yref.size(); std::vector y(w); const T* x_data = x.data(); const T* yref_data = yref.data(); T* y_data = y.data(); tgt(x_data, y_data, &attr); ExpectEQ(y_data, yref_data, w); } }; template struct TestFuncWithRefer, std::vector, std::vector, std::vector, typename jit::MatMulTuples::attr_type> { void operator()(const typename jit::MatMulTuples::func_type tgt, const std::vector& a, const std::vector& b, const std::vector& cref, const typename jit::MatMulTuples::attr_type& attr) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(a.size(), static_cast(attr.m * attr.k)); EXPECT_EQ(b.size(), static_cast(attr.k * attr.n)); EXPECT_EQ(cref.size(), static_cast(attr.m * attr.n)); std::vector c(cref.size()); const T* a_data = a.data(); const T* b_data = b.data(); const T* cref_data = cref.data(); T* c_data = c.data(); tgt(a_data, b_data, c_data, &attr); ExpectEQ(c_data, cref_data, attr.m * attr.n); } }; template void TestAllImpls(const typename KernelTuples::attr_type& attr, Args... args) { TestFuncWithRefer test; // test jitcode auto jitcode = jit::GetJitCode(attr); if (jitcode) { VLOG(10) << "Test Jitcode Kernel "; test(jitcode, args...); } // test all impls in more jit::KernelKey kkey(KT, PlaceType()); auto& pool = jit::KernelPool().Instance().AllKernels(); auto iter = pool.find(kkey); if (iter != pool.end()) { auto& impls = iter->second; for (auto& impl : impls) { auto i = dynamic_cast*>(impl.get()); if (i && i->UseMe(attr)) { auto more = i->GetFunc(); VLOG(10) << "Test More Kernel : " << i->ImplType(); test(more, args...); } } } // test result from Get function // VLOG(10) << "Test Get function "; auto tgt = jit::Get(attr); test(tgt, args...); } template void TestXYZNKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(d), y(d), zref(d); RandomVec(d, x.data()); RandomVec(d, y.data()); std::vector xinp(d), yinp(d); // inplace test std::copy(x.begin(), x.end(), xinp.begin()); std::copy(y.begin(), y.end(), yinp.begin()); const T* x_data = x.data(); const T* y_data = y.data(); T* zref_data = zref.data(); T* xinp_data = xinp.data(); T* yinp_data = yinp.data(); // test refer code inplace ref(x_data, y_data, zref_data, d); ref(x_data, yinp_data, yinp_data, d); ref(xinp_data, y_data, xinp_data, d); ExpectEQ(xinp_data, zref_data, d); ExpectEQ(yinp_data, zref_data, d); TestAllImpls, PlaceType, std::vector, std::vector, std::vector>(d, x, y, zref); } } template void TestAXYNKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); const T a = static_cast(3); std::vector x(d), yref(d); std::vector xinp(d); // inplace test RandomVec(d, x.data()); std::copy(x.begin(), x.end(), xinp.begin()); const T* x_data = x.data(); T* yref_data = yref.data(); T* xinp_data = xinp.data(); // test refer code inplace ref(&a, x_data, yref_data, d); ref(&a, xinp_data, xinp_data, d); ExpectEQ(xinp_data, yref_data, d); TestAllImpls, PlaceType, T, std::vector, std::vector>(d, a, x, yref); } } template void TestXRNKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); auto last_acc = FLAGS_acc; FLAGS_acc = 1e-4; for (int d : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(d); RandomVec(d, x.data(), -2.f, 2.f); T ref_res; ref(x.data(), &ref_res, d); TestAllImpls, PlaceType, std::vector, T>(d, x, ref_res); } FLAGS_acc = last_acc; } template void TestXYNKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(d), yref(d); std::vector xinp(d); // inplace test RandomVec(d, x.data(), -2.f, 2.f); std::copy(x.begin(), x.end(), xinp.begin()); const T* x_data = x.data(); T* yref_data = yref.data(); T* xinp_data = xinp.data(); // test refer code inplace ref(x_data, yref_data, d); ref(xinp_data, xinp_data, d); ExpectEQ(xinp_data, yref_data, d); TestAllImpls, PlaceType, std::vector, std::vector>(d, x, yref); } } template void TestLSTMKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); std::vector all_acts = {"sigmoid", "tanh", "relu", "identity"}; for (int d : TestSizes()) { for (bool use_peephole : {true, false}) { for (auto& act_gate : all_acts) { for (auto& act_cand : all_acts) { for (auto& act_cell : all_acts) { const jit::lstm_attr_t attr( d, jit::to_kerneltype(act_gate), jit::to_kerneltype(act_cand), jit::to_kerneltype(act_cell), use_peephole); auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector xsrc(4 * d), wp(3 * d), ct_1(d); std::vector ct_ref(d), ht_ref(d), checked(2 * d); RandomVec(4 * d, xsrc.data(), -2.f, 2.f); RandomVec(3 * d, wp.data(), -1.f, 1.f); RandomVec(d, ct_1.data(), -1.f, 1.f); // x could be changed after compute, so copy to save src std::vector x(xsrc.size()); std::copy(xsrc.begin(), xsrc.end(), x.begin()); const T* ct_1_data = ct_1.data(); const T* wp_data = wp.data(); T* x_data = x.data(); T* checked_data = checked.data(); T* ct_ref_data = ct_ref.data(); T* ht_ref_data = ht_ref.data(); jit::lstm_t step; step.gates = x_data; step.ct_1 = ct_1_data; step.ct = ct_ref_data; step.ht = ht_ref_data; if (use_peephole) { step.wp = wp_data; step.checked = checked_data; } ref(&step, &attr); VLOG(10) << attr; TestAllImpls, PlaceType, std::vector, std::vector, std::vector, std::vector, std::vector>(attr, xsrc, wp, ct_1, ct_ref, ht_ref, attr); } } } } } } template void TestGRUKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); std::vector all_acts = {"sigmoid", "tanh", "relu", "identity"}; for (int d : TestSizes()) { for (auto& act_gate : all_acts) { for (auto& act_cand : all_acts) { const jit::gru_attr_t attr(d, jit::to_kerneltype(act_gate), jit::to_kerneltype(act_cand)); auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector xsrc(3 * d), ht_1(d), ht_ref(d); RandomVec(3 * d, xsrc.data(), -2.f, 2.f); RandomVec(d, ht_1.data(), -2.f, 2.f); // x could be changed after compute, so copy to save src std::vector x(xsrc.size()); std::copy(xsrc.begin(), xsrc.end(), x.begin()); const T* ht_1_data = ht_1.data(); T* x_data = x.data(); T* ht_ref_data = ht_ref.data(); jit::gru_t step; step.gates = x_data; step.ht_1 = ht_1_data; step.ht = ht_ref_data; ref(&step, &attr); VLOG(10) << attr; TestAllImpls, PlaceType, std::vector, std::vector, std::vector>(attr, xsrc, ht_1, ht_ref, attr); } } } } template void TestSeqPoolKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); std::vector pool_types = { jit::SeqPoolType::kSum, jit::SeqPoolType::kAvg, jit::SeqPoolType::kSqrt}; for (auto type : pool_types) { for (int w : TestSizes()) { jit::seq_pool_attr_t attr(w, type); for (int h : TestSizes()) { attr.h = h; auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(h * w), yref(w); RandomVec(h * w, x.data(), -2.f, 2.f); const T* x_data = x.data(); T* yref_data = yref.data(); ref(x_data, yref_data, &attr); VLOG(10) << attr; TestAllImpls, PlaceType, std::vector, std::vector>(attr, x, yref, attr); } } } } template void TestMatMulKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); auto last_acc = FLAGS_acc; // TODO(intel): fix MKL acc issue // https://github.com/PaddlePaddle/Paddle/issues/15447 FLAGS_acc = 1e-3; for (int m : {1, 2, 3, 4}) { for (int n : {1, 2, 3, 4}) { for (int k : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector a(m * k), b(k * n), c(m * n); RandomVec(m * k, a.data(), -2.f, 2.f); RandomVec(k * n, b.data(), -2.f, 2.f); const T* a_data = a.data(); const T* b_data = b.data(); T* c_data = c.data(); const jit::matmul_attr_t attr{m, n, k}; ref(a_data, b_data, c_data, &attr); TestAllImpls, PlaceType, std::vector, std::vector, std::vector>(attr, a, b, c, attr); } } } FLAGS_acc = last_acc; } template void TestSoftmaxKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int bs : {1, 2, 10}) { for (int n : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(bs * n), y(bs * n); RandomVec(bs * n, x.data(), -2.f, 2.f); const T* x_data = x.data(); T* y_data = y.data(); std::vector xinp(x.size()); // inplace test std::copy(x.begin(), x.end(), xinp.begin()); ref(x_data, y_data, n, bs); T* xinp_data = xinp.data(); ref(xinp_data, xinp_data, n, bs); ExpectEQ(xinp_data, y_data, n * bs); TestAllImpls, PlaceType, std::vector, std::vector>(n, x, y, n, bs); } } } template void TestNCHW16CMulNCKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); const int n = 3, c = 16 * 4, h = 10, w = 10; auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); int sz = n * c * h * w; std::vector x(sz), y(n * c), zref(sz); std::vector ztgt(sz), zjit(sz); RandomVec(sz, x.data(), -2.f, 2.f); RandomVec(n * c, y.data(), -2.f, 2.f); const T* x_data = x.data(); const T* y_data = y.data(); T* zref_data = zref.data(); T* ztgt_data = ztgt.data(); T* zjit_data = zjit.data(); constexpr int simd_width = ZMM_FLOAT_BLOCK; int C = c / simd_width; auto tgt = jit::Get, PlaceType>(0); auto jitcode = jit::GetJitCode, PlaceType>(0); EXPECT_TRUE(tgt != nullptr); if (std::is_same::value && paddle::platform::MayIUse(paddle::platform::avx512f)) { EXPECT_TRUE(jitcode != nullptr); } for (int ni = 0; ni < n; ni++) { for (int ci = 0; ci < C; ci++) { auto ptr_x = x_data + ni * C * h * w * simd_width + ci * h * w * simd_width; auto ptr_y = y_data + ni * C * simd_width + ci * simd_width; auto ptr_zref = zref_data + ni * C * h * w * simd_width + ci * h * w * simd_width; auto ptr_ztgt = ztgt_data + ni * C * h * w * simd_width + ci * h * w * simd_width; ref(ptr_x, ptr_y, ptr_zref, h, w); tgt(ptr_x, ptr_y, ptr_ztgt, h, w); if (jitcode) { auto ptr_zjit = zjit_data + ni * C * h * w * simd_width + ci * h * w * simd_width; jitcode(ptr_x, ptr_y, ptr_zjit, h, w); } } } ExpectEQ(ztgt_data, zref_data, sz); if (jitcode) { ExpectEQ(zjit_data, zref_data, sz); } } // XYZNTuple TEST(JITKernel, kVMul) { TestXYZNKernel(); TestXYZNKernel(); } TEST(JITKernel, kVAdd) { TestXYZNKernel(); TestXYZNKernel(); } TEST(JITKernel, kVAddRelu) { TestXYZNKernel(); TestXYZNKernel(); } TEST(JITKernel, kVSub) { TestXYZNKernel(); TestXYZNKernel(); } // AXYNTuples TEST(JITKernel, kVScal) { TestAXYNKernel(); TestAXYNKernel(); } TEST(JITKernel, kVAddBias) { TestAXYNKernel(); TestAXYNKernel(); } // XRNTuples TEST(JITKernel, kHMax) { TestXRNKernel(); TestXRNKernel(); } TEST(JITKernel, kHSum) { TestXRNKernel(); TestXRNKernel(); } // XYNTuples TEST(JITKernel, kVRelu) { TestXYNKernel(); TestXYNKernel(); } TEST(JITKernel, kVIdentity) { TestXYNKernel(); TestXYNKernel(); } TEST(JITKernel, kVSquare) { TestXYNKernel(); TestXYNKernel(); } TEST(JITKernel, kVExp) { TestXYNKernel(); TestXYNKernel(); } TEST(JITKernel, kVSigmoid) { TestXYNKernel(); TestXYNKernel(); } TEST(JITKernel, kVTanh) { TestXYNKernel(); TestXYNKernel(); } // LSTM TEST(JITKernel, kLSTMCtHt) { TestLSTMKernel(); TestLSTMKernel(); } TEST(JITKernel, kLSTMC1H1) { TestLSTMKernel(); TestLSTMKernel(); } // GRU TEST(JITKernel, kGRUH1) { TestGRUKernel(); TestGRUKernel(); } TEST(JITKernel, kGRUHtPart1) { TestGRUKernel(); TestGRUKernel(); } TEST(JITKernel, kGRUHtPart2) { TestGRUKernel(); TestGRUKernel(); } TEST(JITKernel, kSeqPool) { TestSeqPoolKernel(); TestSeqPoolKernel(); } TEST(JITKernel, kMatMul) { TestMatMulKernel(); TestMatMulKernel(); } TEST(JITKernel, kSoftmax) { TestSoftmaxKernel(); TestSoftmaxKernel(); } TEST(JITKernel, kNCHW16CMulNC) { TestNCHW16CMulNCKernel(); TestNCHW16CMulNCKernel(); } // TODO(yihua/TJ): add crf decoding and layer norm unit tests TEST(JITKernel, pool) { // TODO(TJ): add some test }