/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include #include #include #include #include "gflags/gflags.h" #include "glog/logging.h" #include "gtest/gtest.h" #include "paddle/fluid/operators/jit/kernels.h" #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/place.h" DEFINE_double(acc, 1e-5, "Test accuracy threshold."); template void RandomVec(const int n, T* a, const T lower = static_cast(-20.f), const T upper = static_cast(20.f)) { static unsigned int seed = 100; std::mt19937 rng(seed++); std::uniform_real_distribution uniform_dist(0, 1); for (int i = 0; i < n; ++i) { a[i] = static_cast(uniform_dist(rng) * (upper - lower) + lower); } } template void ExpectEQ(const T* target, const T* refer, size_t n) { if (std::is_floating_point::value) { for (size_t i = 0; i < n; ++i) { EXPECT_NEAR(target[i], refer[i], FLAGS_acc); } } else { for (size_t i = 0; i < n; ++i) { EXPECT_EQ(target[i], refer[i]); } } } std::vector TestSizes() { std::vector s; for (int i = 1; i < 32; ++i) { s.push_back(i); } // test some large size s.push_back(100); s.push_back(1000); s.push_back(2000); return s; } namespace jit = paddle::operators::jit; using CPUPlace = paddle::platform::CPUPlace; template struct TestFuncWithRefer { void operator()(const typename KernelTuples::func_type tgt, Args... args) { LOG(FATAL) << "Should specify this function."; } }; template struct TestFuncWithRefer, std::vector, std::vector, std::vector> { void operator()(const typename jit::XYZNTuples::func_type tgt, const std::vector& x, const std::vector& y, const std::vector& zref) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(zref.size(), x.size()); EXPECT_EQ(zref.size(), y.size()); const T* x_data = x.data(); const T* y_data = y.data(); const T* zref_data = zref.data(); const int d = zref.size(); std::vector ztgt(d); T* ztgt_data = ztgt.data(); // test normal tgt(x_data, y_data, ztgt_data, d); ExpectEQ(ztgt_data, zref_data, d); // test inplace x std::copy(x.begin(), x.end(), ztgt.begin()); tgt(ztgt_data, y_data, ztgt_data, d); ExpectEQ(ztgt_data, zref_data, d); // test inplace y std::copy(y.begin(), y.end(), ztgt.begin()); tgt(x_data, ztgt_data, ztgt_data, d); ExpectEQ(ztgt_data, zref_data, d); } }; template struct TestFuncWithRefer, T, std::vector, std::vector> { void operator()(const typename jit::AXYNTuples::func_type tgt, const T a, const std::vector& x, const std::vector& yref) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(yref.size(), x.size()); const T* x_data = x.data(); const T* yref_data = yref.data(); const int d = yref.size(); std::vector ytgt(d); T* ytgt_data = ytgt.data(); // test normal tgt(&a, x_data, ytgt_data, d); ExpectEQ(ytgt_data, yref_data, d); // test inplace x std::copy(x.begin(), x.end(), ytgt.begin()); tgt(&a, ytgt_data, ytgt_data, d); ExpectEQ(ytgt_data, yref_data, d); } }; template struct TestFuncWithRefer, std::vector, std::vector, int, int> { void operator()(const typename jit::SoftmaxTuples::func_type tgt, const std::vector& x, const std::vector& yref, int n, int bs) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(yref.size(), x.size()); EXPECT_EQ(x.size(), static_cast(n * bs)); const T* x_data = x.data(); const T* yref_data = yref.data(); std::vector ytgt(n * bs); T* ytgt_data = ytgt.data(); // test normal tgt(x_data, ytgt_data, n, bs); ExpectEQ(ytgt_data, yref_data, n * bs); // test inplace x std::copy(x.begin(), x.end(), ytgt.begin()); tgt(ytgt_data, ytgt_data, n, bs); ExpectEQ(ytgt_data, yref_data, n * bs); } }; template struct TestFuncWithRefer, std::vector, T> { void operator()(const typename jit::XRNTuples::func_type tgt, const std::vector& x, const T ref_res) { EXPECT_TRUE(tgt != nullptr); T tgt_res; tgt(x.data(), &tgt_res, x.size()); ExpectEQ(&tgt_res, &ref_res, 1); } }; template struct TestFuncWithRefer, std::vector, std::vector> { void operator()(const typename jit::XYNTuples::func_type tgt, const std::vector& x, const std::vector& yref) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(yref.size(), x.size()); const T* x_data = x.data(); const T* yref_data = yref.data(); const int d = yref.size(); std::vector ytgt(d); T* ytgt_data = ytgt.data(); // test normal tgt(x_data, ytgt_data, d); ExpectEQ(ytgt_data, yref_data, d); // test inplace x std::copy(x.begin(), x.end(), ytgt.begin()); tgt(ytgt_data, ytgt_data, d); ExpectEQ(ytgt_data, yref_data, d); } }; template struct TestFuncWithRefer, std::vector, std::vector, std::vector, std::vector, std::vector, typename jit::LSTMTuples::attr_type> { void operator()(const typename jit::LSTMTuples::func_type tgt, const std::vector& xsrc, const std::vector& wp, const std::vector& ct_1, const std::vector& ct_ref, const std::vector& ht_ref, const typename jit::LSTMTuples::attr_type& attr) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(ct_ref.size(), ht_ref.size()); EXPECT_EQ(ct_1.size(), ht_ref.size()); EXPECT_EQ(xsrc.size(), 4 * ht_ref.size()); EXPECT_EQ(wp.size(), 3 * ht_ref.size()); // x could be changed after compute, so copy to save src int d = ht_ref.size(); std::vector x(xsrc.size()), ct(ct_ref.size()), ht(ht_ref.size()); std::vector checked(2 * d); std::copy(xsrc.begin(), xsrc.end(), x.begin()); const T* ct_1_data = ct_1.data(); const T* wp_data = wp.data(); const T* ct_ref_data = ct_ref.data(); const T* ht_ref_data = ht_ref.data(); T* x_data = x.data(); T* ct_data = ct.data(); T* ht_data = ht.data(); T* checked_data = checked.data(); jit::lstm_t step; step.gates = x_data; step.ct_1 = ct_1_data; step.ct = ct_data; step.ht = ht_data; if (attr.use_peephole) { step.wp = wp_data; step.checked = checked_data; } tgt(&step, &attr); ExpectEQ(ct_data, ct_ref_data, d); ExpectEQ(ht_data, ht_ref_data, d); } }; template struct TestFuncWithRefer, std::vector, std::vector, std::vector, typename jit::GRUTuples::attr_type> { void operator()(const typename jit::GRUTuples::func_type tgt, const std::vector& xsrc, const std::vector& ht_1, const std::vector& ht_ref, const typename jit::GRUTuples::attr_type& attr) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(ht_1.size(), ht_ref.size()); EXPECT_EQ(xsrc.size(), 3 * ht_ref.size()); // x could be changed after compute, so copy to save src int d = ht_ref.size(); std::vector x(xsrc.size()), ht(ht_ref.size()); std::copy(xsrc.begin(), xsrc.end(), x.begin()); const T* ht_1_data = ht_1.data(); const T* ht_ref_data = ht_ref.data(); T* x_data = x.data(); T* ht_data = ht.data(); jit::gru_t step; step.gates = x_data; step.ht_1 = ht_1_data; step.ht = ht_data; tgt(&step, &attr); ExpectEQ(ht_data, ht_ref_data, d); } }; template struct TestFuncWithRefer, std::vector, std::vector, typename jit::SeqPoolTuples::attr_type> { void operator()(const typename jit::SeqPoolTuples::func_type tgt, const std::vector& x, const std::vector& yref, const typename jit::SeqPoolTuples::attr_type& attr) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(x.size() % yref.size(), static_cast(0)); int w = yref.size(); std::vector y(w); const T* x_data = x.data(); const T* yref_data = yref.data(); T* y_data = y.data(); tgt(x_data, y_data, &attr); ExpectEQ(y_data, yref_data, w); } }; template struct TestFuncWithRefer, std::vector, std::vector, std::vector, typename jit::EmbSeqPoolTuples::attr_type> { void operator()(const typename jit::EmbSeqPoolTuples::func_type tgt, const std::vector& table, const std::vector& idx, const std::vector& oref, const typename jit::EmbSeqPoolTuples::attr_type& attr) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(table.size(), static_cast(attr.table_height * attr.table_width)); EXPECT_EQ(idx.size(), static_cast(attr.index_height * attr.index_width)); EXPECT_EQ(oref.size(), static_cast(attr.table_width * attr.index_width)); const T* table_data = table.data(); const int64_t* idx_data = idx.data(); const T* oref_data = oref.data(); int o_w = oref.size(); std::vector out(o_w); T* o_data = out.data(); tgt(table_data, idx_data, o_data, &attr); ExpectEQ(o_data, oref_data, o_w); } }; template struct TestFuncWithRefer, T, std::vector, std::vector, std::vector, std::vector, typename jit::SgdTuples::attr_type> { void operator()(const typename jit::SgdTuples::func_type tgt, const T lr, const std::vector& param, const std::vector& grad, const std::vector& rows, const std::vector& oref, const typename jit::SgdTuples::attr_type& attr) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(param.size(), static_cast(attr.param_height * attr.param_width)); EXPECT_EQ(grad.size(), static_cast(attr.grad_height * attr.grad_width)); EXPECT_EQ(rows.size(), static_cast(attr.selected_rows_size)); EXPECT_EQ(param.size(), oref.size()); const T* param_data = param.data(); const T* grad_data = grad.data(); const int64_t* rows_data = rows.data(); const T* oref_data = oref.data(); std::vector out(oref.size()); T* o_data = out.data(); tgt(&lr, param_data, grad_data, rows_data, o_data, &attr); // only the selected rows should be equal for (size_t i = 0; i < rows.size(); ++i) { ExpectEQ(o_data + rows[i] * attr.grad_width, oref_data + rows[i] * attr.grad_width, attr.grad_width); } // inplace std::copy(param.begin(), param.end(), out.begin()); tgt(&lr, o_data, grad_data, rows_data, o_data, &attr); for (size_t i = 0; i < rows.size(); ++i) { ExpectEQ(o_data + rows[i] * attr.grad_width, oref_data + rows[i] * attr.grad_width, attr.grad_width); } } }; template struct TestFuncWithRefer, std::vector, std::vector, std::vector, typename jit::MatMulTuples::attr_type> { void operator()(const typename jit::MatMulTuples::func_type tgt, const std::vector& a, const std::vector& b, const std::vector& cref, const typename jit::MatMulTuples::attr_type& attr) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(a.size(), static_cast(attr.m * attr.k)); EXPECT_EQ(b.size(), static_cast(attr.k * attr.n)); EXPECT_EQ(cref.size(), static_cast(attr.m * attr.n)); std::vector c(cref.size()); const T* a_data = a.data(); const T* b_data = b.data(); const T* cref_data = cref.data(); T* c_data = c.data(); tgt(a_data, b_data, c_data, &attr); ExpectEQ(c_data, cref_data, attr.m * attr.n); } }; template struct TestFuncWithRefer, std::vector, std::vector, std::vector, std::vector, std::vector, std::vector, int, float, int> { void operator()(const typename jit::LayerNormTuples::func_type tgt, std::vector& x, std::vector& outref, // NOLINT std::vector& mean, std::vector& var, // NOLINT const std::vector& scale, const std::vector& bias, int left, const float epsilon, int right) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(x.size(), static_cast(left * right)); EXPECT_EQ(outref.size(), static_cast(left * right)); EXPECT_EQ(mean.size(), static_cast(left)); EXPECT_EQ(var.size(), static_cast(left)); EXPECT_EQ(scale.size(), static_cast(right)); EXPECT_EQ(bias.size(), static_cast(right)); std::vector outtgt(outref.size()); const T* scale_data = scale.data(); const T* bias_data = bias.data(); T* x_data = x.data(); T* mean_data = mean.data(); T* var_data = var.data(); T* outref_data = outref.data(); T* outtgt_data = outtgt.data(); tgt(x_data, outtgt_data, mean_data, var_data, scale_data, bias_data, left, epsilon, right); ExpectEQ(outtgt_data, outref_data, left * right); } }; template struct TestFuncWithRefer, int, std::vector, std::vector, std::vector, std::vector, int> { void operator()(const typename jit::CRFDecodingTuples::func_type tgt, const int seq_len, const std::vector& x, const std::vector& w, std::vector& alpharef, // NOLINT std::vector& trackref, int tag_num) { // NOLINT constexpr int state_trans_base_idx = 2; EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(x.size(), static_cast(seq_len * tag_num)); EXPECT_EQ(w.size(), static_cast((tag_num + state_trans_base_idx) * tag_num)); EXPECT_EQ(alpharef.size(), static_cast(seq_len * tag_num)); EXPECT_EQ(trackref.size(), static_cast(seq_len * tag_num)); std::vector alphatgt(alpharef.size()); std::vector tracktgt(trackref.size()); memcpy(trackref.data(), tracktgt.data(), tag_num * sizeof(int)); tgt(seq_len, (const T*)x.data(), (const T*)w.data(), alphatgt.data(), tracktgt.data(), tag_num); ExpectEQ(alpharef.data(), alphatgt.data(), seq_len * tag_num); ExpectEQ(trackref.data(), tracktgt.data(), seq_len * tag_num); } }; template void TestAllImpls(const typename KernelTuples::attr_type& attr, Args... args) { TestFuncWithRefer test; // test jitcode auto jitcode = jit::GetJitCode(attr); if (jitcode) { VLOG(10) << "Test Jitcode Kernel "; test(jitcode, args...); } // test all impls in more jit::KernelKey kkey(KT, PlaceType()); auto& pool = jit::KernelPool().Instance().AllKernels(); auto iter = pool.find(kkey); if (iter != pool.end()) { auto& impls = iter->second; for (auto& impl : impls) { auto i = dynamic_cast*>(impl.get()); if (i && i->UseMe(attr)) { auto more = i->GetFunc(); VLOG(10) << "Test More Kernel : " << i->ImplType(); test(more, args...); } } } // test result from Get function // VLOG(10) << "Test Get function "; auto tgt = jit::Get(attr); test(tgt, args...); } template void TestXYZNKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(d), y(d), zref(d); RandomVec(d, x.data()); RandomVec(d, y.data()); std::vector xinp(d), yinp(d); // inplace test std::copy(x.begin(), x.end(), xinp.begin()); std::copy(y.begin(), y.end(), yinp.begin()); const T* x_data = x.data(); const T* y_data = y.data(); T* zref_data = zref.data(); T* xinp_data = xinp.data(); T* yinp_data = yinp.data(); // test refer code inplace ref(x_data, y_data, zref_data, d); ref(x_data, yinp_data, yinp_data, d); ref(xinp_data, y_data, xinp_data, d); ExpectEQ(xinp_data, zref_data, d); ExpectEQ(yinp_data, zref_data, d); TestAllImpls, PlaceType, std::vector, std::vector, std::vector>(d, x, y, zref); } } template void TestAXYNKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); const T a = static_cast(3); std::vector x(d), yref(d); std::vector xinp(d); // inplace test RandomVec(d, x.data()); std::copy(x.begin(), x.end(), xinp.begin()); const T* x_data = x.data(); T* yref_data = yref.data(); T* xinp_data = xinp.data(); // test refer code inplace ref(&a, x_data, yref_data, d); ref(&a, xinp_data, xinp_data, d); ExpectEQ(xinp_data, yref_data, d); TestAllImpls, PlaceType, T, std::vector, std::vector>(d, a, x, yref); } } template void TestXRNKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); auto last_acc = FLAGS_acc; FLAGS_acc = 1e-4; for (int d : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(d); RandomVec(d, x.data(), -2.f, 2.f); T ref_res; ref(x.data(), &ref_res, d); TestAllImpls, PlaceType, std::vector, T>(d, x, ref_res); } FLAGS_acc = last_acc; } template void TestXYNKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(d), yref(d); std::vector xinp(d); // inplace test RandomVec(d, x.data(), -2.f, 2.f); std::copy(x.begin(), x.end(), xinp.begin()); const T* x_data = x.data(); T* yref_data = yref.data(); T* xinp_data = xinp.data(); // test refer code inplace ref(x_data, yref_data, d); ref(xinp_data, xinp_data, d); ExpectEQ(xinp_data, yref_data, d); TestAllImpls, PlaceType, std::vector, std::vector>(d, x, yref); } } template void TestLSTMKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); std::vector all_acts = {"sigmoid", "tanh", "relu", "identity"}; for (int d : TestSizes()) { for (bool use_peephole : {true, false}) { for (auto& act_gate : all_acts) { for (auto& act_cand : all_acts) { for (auto& act_cell : all_acts) { const jit::lstm_attr_t attr( d, jit::to_kerneltype(act_gate), jit::to_kerneltype(act_cand), jit::to_kerneltype(act_cell), use_peephole); auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector xsrc(4 * d), wp(3 * d), ct_1(d); std::vector ct_ref(d), ht_ref(d), checked(2 * d); RandomVec(4 * d, xsrc.data(), -2.f, 2.f); RandomVec(3 * d, wp.data(), -1.f, 1.f); RandomVec(d, ct_1.data(), -1.f, 1.f); // x could be changed after compute, so copy to save src std::vector x(xsrc.size()); std::copy(xsrc.begin(), xsrc.end(), x.begin()); const T* ct_1_data = ct_1.data(); const T* wp_data = wp.data(); T* x_data = x.data(); T* checked_data = checked.data(); T* ct_ref_data = ct_ref.data(); T* ht_ref_data = ht_ref.data(); jit::lstm_t step; step.gates = x_data; step.ct_1 = ct_1_data; step.ct = ct_ref_data; step.ht = ht_ref_data; if (use_peephole) { step.wp = wp_data; step.checked = checked_data; } ref(&step, &attr); VLOG(10) << attr; TestAllImpls, PlaceType, std::vector, std::vector, std::vector, std::vector, std::vector>(attr, xsrc, wp, ct_1, ct_ref, ht_ref, attr); } } } } } } template void TestGRUKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); std::vector all_acts = {"sigmoid", "tanh", "relu", "identity"}; for (int d : TestSizes()) { for (auto& act_gate : all_acts) { for (auto& act_cand : all_acts) { const jit::gru_attr_t attr(d, jit::to_kerneltype(act_gate), jit::to_kerneltype(act_cand)); auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector xsrc(3 * d), ht_1(d), ht_ref(d); RandomVec(3 * d, xsrc.data(), -2.f, 2.f); RandomVec(d, ht_1.data(), -2.f, 2.f); // x could be changed after compute, so copy to save src std::vector x(xsrc.size()); std::copy(xsrc.begin(), xsrc.end(), x.begin()); const T* ht_1_data = ht_1.data(); T* x_data = x.data(); T* ht_ref_data = ht_ref.data(); jit::gru_t step; step.gates = x_data; step.ht_1 = ht_1_data; step.ht = ht_ref_data; ref(&step, &attr); VLOG(10) << attr; TestAllImpls, PlaceType, std::vector, std::vector, std::vector>(attr, xsrc, ht_1, ht_ref, attr); } } } } template void TestSeqPoolKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); std::vector pool_types = { jit::SeqPoolType::kSum, jit::SeqPoolType::kAvg, jit::SeqPoolType::kSqrt}; for (auto type : pool_types) { for (int w : TestSizes()) { jit::seq_pool_attr_t attr(w, type); for (int h : TestSizes()) { attr.h = h; auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(h * w), yref(w); RandomVec(h * w, x.data(), -2.f, 2.f); const T* x_data = x.data(); T* yref_data = yref.data(); ref(x_data, yref_data, &attr); VLOG(10) << attr; TestAllImpls, PlaceType, std::vector, std::vector>(attr, x, yref, attr); } } } } template void TestMatMulKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); auto last_acc = FLAGS_acc; // TODO(intel): fix MKL acc issue // https://github.com/PaddlePaddle/Paddle/issues/15447 FLAGS_acc = 1e-3; for (int m : {1, 2, 3, 4}) { for (int n : {1, 2, 3, 4}) { for (int k : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector a(m * k), b(k * n), c(m * n); RandomVec(m * k, a.data(), -2.f, 2.f); RandomVec(k * n, b.data(), -2.f, 2.f); const T* a_data = a.data(); const T* b_data = b.data(); T* c_data = c.data(); const jit::matmul_attr_t attr{m, n, k}; ref(a_data, b_data, c_data, &attr); TestAllImpls, PlaceType, std::vector, std::vector, std::vector>(attr, a, b, c, attr); } } } FLAGS_acc = last_acc; } template void TestSoftmaxKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int bs : {1, 2, 10}) { for (int n : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(bs * n), y(bs * n); RandomVec(bs * n, x.data(), -2.f, 2.f); const T* x_data = x.data(); T* y_data = y.data(); std::vector xinp(x.size()); // inplace test std::copy(x.begin(), x.end(), xinp.begin()); ref(x_data, y_data, n, bs); T* xinp_data = xinp.data(); ref(xinp_data, xinp_data, n, bs); ExpectEQ(xinp_data, y_data, n * bs); TestAllImpls, PlaceType, std::vector, std::vector>(n, x, y, n, bs); } } } template void TestEmbSeqPoolKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); int64_t tbl_h = 1e4; std::vector pool_types = { jit::SeqPoolType::kSum}; // only support sum yet for (int tbl_w : TestSizes()) { std::vector table(tbl_h * tbl_w); RandomVec(tbl_h * tbl_w, table.data(), -2.f, 2.f); const T* table_data = table.data(); for (auto type : pool_types) { for (int idx_w : {1, 2, 10, 16}) { for (int idx_h : {1, 2, 9, 13, 16}) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector idx(idx_h * idx_w); RandomVec(idx_h * idx_w, idx.data(), 0, tbl_h - 1); int64_t out_w = tbl_w * idx_w; std::vector oref(out_w); const int64_t* idx_data = idx.data(); T* o_data = oref.data(); jit::emb_seq_pool_attr_t attr(tbl_h, tbl_w, idx_h, idx_w, out_w, type); ref(table_data, idx_data, o_data, &attr); TestAllImpls, PlaceType, std::vector, std::vector, std::vector>(attr, table, idx, oref, attr); } } } } } template void TestSgdKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); const T lr = 0.1; auto UnDuplicatedRandomVec = [](int n, const int64_t lower, const int64_t upper) -> std::vector { PADDLE_ENFORCE_LE(static_cast(upper - lower), n - 1); PADDLE_ENFORCE_GT(n, 0); std::vector all, out; for (int i = 0; i < n; ++i) { all.push_back(i); } std::random_shuffle(all.begin(), all.end()); out.insert(out.begin(), all.begin(), all.begin() + n); return out; }; for (int param_h : {1, 10}) { for (int grad_w : TestSizes()) { std::vector param(param_h * grad_w); std::vector param_out(param_h * grad_w); RandomVec(param_h * grad_w, param.data(), -2.f, 2.f); const T* param_data = param.data(); T* out_data = param_out.data(); for (int rows_size = 1; rows_size <= param_h; ++rows_size) { std::vector grad(rows_size * grad_w); std::vector rows = UnDuplicatedRandomVec(rows_size, 0, rows_size - 1); RandomVec(rows_size * grad_w, grad.data(), -2.f, 2.f); const int64_t* rows_data = rows.data(); const T* grad_data = grad.data(); auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); jit::sgd_attr_t attr(param_h, grad_w, rows_size, grad_w, rows_size); ref(&lr, param_data, grad_data, rows_data, out_data, &attr); // inplace test std::vector inp(param.size()); std::copy(param.begin(), param.end(), inp.begin()); T* inp_data = inp.data(); ref(&lr, inp_data, grad_data, rows_data, inp_data, &attr); // only the selected rows should be equal for (int i = 0; i < rows_size; ++i) { ExpectEQ(inp_data + rows[i] * grad_w, out_data + rows[i] * grad_w, grad_w); } TestAllImpls, PlaceType, T, std::vector, std::vector, std::vector, std::vector>( attr, lr, param, grad, rows, param_out, attr); } } } } template void TestNCHW16CMulNCKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); const int n = 3, c = 16 * 4, h = 10, w = 10; auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); int sz = n * c * h * w; std::vector x(sz), y(n * c), zref(sz); std::vector ztgt(sz), zjit(sz); RandomVec(sz, x.data(), -2.f, 2.f); RandomVec(n * c, y.data(), -2.f, 2.f); const T* x_data = x.data(); const T* y_data = y.data(); T* zref_data = zref.data(); T* ztgt_data = ztgt.data(); T* zjit_data = zjit.data(); constexpr int simd_width = ZMM_FLOAT_BLOCK; int C = c / simd_width; auto tgt = jit::Get, PlaceType>(0); auto jitcode = jit::GetJitCode, PlaceType>(0); EXPECT_TRUE(tgt != nullptr); if (std::is_same::value && paddle::platform::MayIUse(paddle::platform::avx512f)) { EXPECT_TRUE(jitcode != nullptr); } for (int ni = 0; ni < n; ni++) { for (int ci = 0; ci < C; ci++) { auto ptr_x = x_data + ni * C * h * w * simd_width + ci * h * w * simd_width; auto ptr_y = y_data + ni * C * simd_width + ci * simd_width; auto ptr_zref = zref_data + ni * C * h * w * simd_width + ci * h * w * simd_width; auto ptr_ztgt = ztgt_data + ni * C * h * w * simd_width + ci * h * w * simd_width; ref(ptr_x, ptr_y, ptr_zref, h, w); tgt(ptr_x, ptr_y, ptr_ztgt, h, w); if (jitcode) { auto ptr_zjit = zjit_data + ni * C * h * w * simd_width + ci * h * w * simd_width; jitcode(ptr_x, ptr_y, ptr_zjit, h, w); } } } ExpectEQ(ztgt_data, zref_data, sz); if (jitcode) { ExpectEQ(zjit_data, zref_data, sz); } } template void TestLayerNormKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); const T epsilon = 9.99999975e-06; for (int n : {1, 2, 10}) { for (int x_dim_0 : {1, 9, 17, 50}) { int left = n * x_dim_0; for (int x_dim_1 : TestSizes()) { int right = x_dim_1; auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); int sz = left * right; std::vector x(sz), mean(left), var(left), scale(right), bias(right), outref(sz); RandomVec(sz, x.data(), -2.f, 2.f); RandomVec(left, mean.data(), -2.f, 2.f); RandomVec(left, var.data(), -2.f, 2.f); RandomVec(right, scale.data(), -2.f, 2.f); RandomVec(right, bias.data(), -2.f, 2.f); const T* scale_data = scale.data(); const T* bias_data = bias.data(); T* x_data = x.data(); T* mean_data = mean.data(); T* var_data = var.data(); T* outref_data = outref.data(); ref(x_data, outref_data, mean_data, var_data, scale_data, bias_data, left, epsilon, right); TestAllImpls, PlaceType, std::vector, std::vector, std::vector, std::vector, std::vector, std::vector, int, float>( right, x, outref, mean, var, scale, bias, left, epsilon, right); } } } } template void TestCRFDecodingKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); constexpr int state_trans_base_idx = 2; for (int seq_len : {1, 11, 17, 50}) { for (int tag_num : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); int x_sz = seq_len * tag_num; int w_sz = (tag_num + state_trans_base_idx) * tag_num; std::vector x(x_sz), w(w_sz), alpharef(x_sz); std::vector trackref(x_sz); RandomVec(x_sz, x.data(), -2.f, 2.f); RandomVec(w_sz, w.data(), -2.f, 2.f); ref(seq_len, (const T*)x.data(), (const T*)w.data(), alpharef.data(), trackref.data(), tag_num); TestAllImpls, PlaceType, int, std::vector, std::vector, std::vector, std::vector, int>(tag_num, seq_len, x, w, alpharef, trackref, tag_num); } } } // XYZNTuple TEST(JITKernel, kVMul) { TestXYZNKernel(); TestXYZNKernel(); } TEST(JITKernel, kVAdd) { TestXYZNKernel(); TestXYZNKernel(); } TEST(JITKernel, kVAddRelu) { TestXYZNKernel(); TestXYZNKernel(); } TEST(JITKernel, kVSub) { TestXYZNKernel(); TestXYZNKernel(); } // AXYNTuples TEST(JITKernel, kVScal) { TestAXYNKernel(); TestAXYNKernel(); } TEST(JITKernel, kVAddBias) { TestAXYNKernel(); TestAXYNKernel(); } // XRNTuples TEST(JITKernel, kHMax) { TestXRNKernel(); TestXRNKernel(); } TEST(JITKernel, kHSum) { TestXRNKernel(); TestXRNKernel(); } // XYNTuples TEST(JITKernel, kVRelu) { TestXYNKernel(); TestXYNKernel(); } TEST(JITKernel, kVIdentity) { TestXYNKernel(); TestXYNKernel(); } TEST(JITKernel, kVSquare) { TestXYNKernel(); TestXYNKernel(); } TEST(JITKernel, kVExp) { TestXYNKernel(); TestXYNKernel(); } TEST(JITKernel, kVSigmoid) { TestXYNKernel(); TestXYNKernel(); } TEST(JITKernel, kVTanh) { TestXYNKernel(); TestXYNKernel(); } // LSTM TEST(JITKernel, kLSTMCtHt) { TestLSTMKernel(); TestLSTMKernel(); } TEST(JITKernel, kLSTMC1H1) { TestLSTMKernel(); TestLSTMKernel(); } // GRU TEST(JITKernel, kGRUH1) { TestGRUKernel(); TestGRUKernel(); } TEST(JITKernel, kGRUHtPart1) { TestGRUKernel(); TestGRUKernel(); } TEST(JITKernel, kGRUHtPart2) { TestGRUKernel(); TestGRUKernel(); } TEST(JITKernel, kSeqPool) { TestSeqPoolKernel(); TestSeqPoolKernel(); } TEST(JITKernel, kMatMul) { TestMatMulKernel(); TestMatMulKernel(); } TEST(JITKernel, kSoftmax) { TestSoftmaxKernel(); TestSoftmaxKernel(); } TEST(JITKernel, kEmbSeqPool) { TestEmbSeqPoolKernel(); TestEmbSeqPoolKernel(); } TEST(JITKernel, kSgd) { TestSgdKernel(); TestSgdKernel(); } TEST(JITKernel, kNCHW16CMulNC) { TestNCHW16CMulNCKernel(); TestNCHW16CMulNCKernel(); } TEST(JITKernel, kLayerNorm) { TestLayerNormKernel(); TestLayerNormKernel(); } TEST(JITKernel, kCRFDecoding) { TestCRFDecodingKernel(); TestCRFDecodingKernel(); } TEST(JITKernel, pool) { // TODO(TJ): add some test }