diff --git a/paddle/fluid/operators/jit/README.md b/paddle/fluid/operators/jit/README.md index c2e32cc49b2934c7010b077b97f67cfa21866975..2d72aa4d569aa717b16f5d29f2df28fe3c66a719 100644 --- a/paddle/fluid/operators/jit/README.md +++ b/paddle/fluid/operators/jit/README.md @@ -37,10 +37,12 @@ PaddlePaddle/Paddle/paddle/fluid/ ## 测试 - 逻辑测试 - 所有实现都要与refer的code对比,需要满足精度要求 + 所有实现都要与refer的code对比,需要满足精度要求, 包括float和double的数据类型 - 性能测试 + 所有实现的性能对比,并且与最终的`jit::Get`方法对比,该方法拿到的性能需要是最好的。 # 如何添加新的算子 -- 在`KernelType` 中添加 `your_key` -- 实现Reference 的逻辑,每个jitkernel的Reference 实现是必须的。不要依赖任何第三方库。并在`refer/CmakeLists.txt`中`USE_JITKERNEL_REFER(your_key)` +- 在`KernelType` 中添加 `your_key` . +- 实现Reference 的逻辑,每个jitkernel的Reference 实现是必须的。不要依赖任何第三方库。并在`refer/CmakeLists.txt`中`USE_JITKERNEL_REFER(your_key)`. +- 必要时可以添加新的`KernelTuples`,可以参考`XYZNTuples`. diff --git a/paddle/fluid/operators/jit/benchmark.cc b/paddle/fluid/operators/jit/benchmark.cc index 27a1ba7ba32f26e0fb6da083e5e8214748750974..2ad87e414bd3d8cd522c8d64ecd5fcf9371a0a63 100644 --- a/paddle/fluid/operators/jit/benchmark.cc +++ b/paddle/fluid/operators/jit/benchmark.cc @@ -53,9 +53,9 @@ std::vector TestSizes() { // return this function avg time template -double BenchTartgetFunc(const typename KernelTuples::func_type tgt, - const std::vector& x, const std::vector& y, - std::vector& z) { // NOLINT +double BenchXYZNFunc(const typename KernelTuples::func_type tgt, + const std::vector& x, const std::vector& y, + std::vector& z) { // NOLINT const T* x_data = x.data(); const T* y_data = y.data(); const int d = z.size(); @@ -83,14 +83,14 @@ void BenchXYZNKernel() { // refer auto refer = jit::GetRefer>(); if (refer) { - auto res = BenchTartgetFunc>(refer, x, y, z); + auto res = BenchXYZNFunc>(refer, x, y, z); infos.push_back(std::make_pair("Refer", res)); } // test jitcode auto jitcode = jit::GetJitCode, PlaceType>(d); if (jitcode) { - auto res = BenchTartgetFunc>(jitcode, x, y, z); + auto res = BenchXYZNFunc>(jitcode, x, y, z); infos.push_back(std::make_pair("JitCode", res)); } @@ -105,7 +105,7 @@ void BenchXYZNKernel() { impl.get()); if (i && i->UseMe(d)) { auto more = i->GetFunc(); - auto res = BenchTartgetFunc>(more, x, y, z); + auto res = BenchXYZNFunc>(more, x, y, z); infos.push_back(std::make_pair("More", res)); } } @@ -116,7 +116,7 @@ void BenchXYZNKernel() { if (!tgt) { LOG(ERROR) << "Target can not be empty!"; } - auto res = BenchTartgetFunc>(tgt, x, y, z); + auto res = BenchXYZNFunc>(tgt, x, y, z); infos.push_back(std::make_pair("Target", res)); // print @@ -129,6 +129,78 @@ void BenchXYZNKernel() { } } +// return this function avg time +template +double BenchAXYNFunc(const typename KernelTuples::func_type tgt, const T a, + const std::vector& x, + std::vector& y) { // NOLINT + const T* x_data = x.data(); + T* y_data = y.data(); + const int d = y.size(); + for (int i = 0; i < FLAGS_burning; ++i) { + tgt(&a, x_data, y_data, d); + } + auto start = GetCurrentUS(); + for (int i = 0; i < FLAGS_repeat; ++i) { + tgt(&a, x_data, y_data, d); + } + auto end = GetCurrentUS(); + return (end - start) / FLAGS_repeat; +} + +template +void BenchAXYNKernel() { + namespace jit = paddle::operators::jit; + for (int d : TestSizes()) { + std::vector> infos; + const T a = static_cast(3); + std::vector x(d), y(d); + RandomVec(d, x.data()); + // test refer + auto refer = jit::GetRefer>(); + if (refer) { + auto res = BenchAXYNFunc>(refer, a, x, y); + infos.push_back(std::make_pair("Refer", res)); + } + // test jitcode + auto jitcode = jit::GetJitCode, PlaceType>(d); + if (jitcode) { + auto res = BenchAXYNFunc>(jitcode, a, x, y); + infos.push_back(std::make_pair("JitCode", res)); + } + // test all impls in more + jit::KernelKey kkey(KT, PlaceType()); + auto& pool = jit::KernelPool().Instance().AllKernels(); + auto iter = pool.find(kkey); + if (iter != pool.end()) { + auto& impls = iter->second; + for (auto& impl : impls) { + auto i = dynamic_cast>*>( + impl.get()); + if (i && i->UseMe(d)) { + auto more = i->GetFunc(); + auto res = BenchAXYNFunc>(more, a, x, y); + infos.push_back(std::make_pair("More", res)); + } + } + } + // Test result from Get function + auto tgt = jit::Get, PlaceType>(d); + if (!tgt) { + LOG(ERROR) << "Target can not be empty!"; + } + auto res = BenchAXYNFunc>(tgt, a, x, y); + infos.push_back(std::make_pair("Target", res)); + // print + std::ostringstream loginfos; + loginfos << "Kernel Type: " << jit::to_string(KT) << ", size " << d << ": "; + for (auto pair : infos) { + loginfos << pair.first << " takes " << pair.second << " us; "; + } + LOG(INFO) << loginfos.str(); + } +} + // Benchmark all jit kernels including jitcode, mkl and refer. // To use this tool, run command: ./benchmark [options...] // Options: @@ -147,4 +219,7 @@ int main(int argc, char* argv[]) { BenchXYZNKernel(); BenchXYZNKernel(); BenchXYZNKernel(); + + BenchAXYNKernel(); + BenchAXYNKernel(); } diff --git a/paddle/fluid/operators/jit/helper.cc b/paddle/fluid/operators/jit/helper.cc index 2260f0aed42ff836e31837c8f7e87e037a7ef939..c9aaffb8b8d81e3cf7b5b3366d244d363af84247 100644 --- a/paddle/fluid/operators/jit/helper.cc +++ b/paddle/fluid/operators/jit/helper.cc @@ -13,6 +13,7 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/helper.h" +#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace operators { @@ -32,7 +33,10 @@ const char* to_string(KernelType kt) { return "vscal"; case vexp: return "vexp"; + case vaddbias: + return "vaddbias"; default: + PADDLE_THROW("Not support type: %d", kt); return "NOT JITKernel"; } return nullptr; diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h index b2e9d639776b82bb07b9a3c6d1553b116e88a7ec..74ecf3dade5d70c2c8a1b8732f98448719ee85a7 100644 --- a/paddle/fluid/operators/jit/kernel_base.h +++ b/paddle/fluid/operators/jit/kernel_base.h @@ -19,7 +19,15 @@ namespace paddle { namespace operators { namespace jit { -typedef enum { vmul = 0, vadd = 1, vaddrelu, vsub, vscal, vexp } KernelType; +typedef enum { + vmul = 0, + vadd = 1, + vaddrelu, + vsub, + vscal, + vaddbias, + vexp +} KernelType; template struct XYZNTuples { @@ -28,6 +36,9 @@ struct XYZNTuples { typedef void (*func_type)(const T*, const T*, T*, int); }; +template +struct AXYNTuples : public XYZNTuples {}; + // Just for adding to kernel pool without template class Kernel { public: diff --git a/paddle/fluid/operators/jit/refer/CMakeLists.txt b/paddle/fluid/operators/jit/refer/CMakeLists.txt index b6ff80d03dfc41ce6259b7563f37ca1855db0f81..afe3f6ca0f48987fa478844b029e4c442f92ac93 100644 --- a/paddle/fluid/operators/jit/refer/CMakeLists.txt +++ b/paddle/fluid/operators/jit/refer/CMakeLists.txt @@ -8,3 +8,8 @@ endfunction() # use refer kernel by name USE_JITKERNEL_REFER(vmul) +USE_JITKERNEL_REFER(vadd) +USE_JITKERNEL_REFER(vaddrelu) +USE_JITKERNEL_REFER(vsub) +USE_JITKERNEL_REFER(vscal) +USE_JITKERNEL_REFER(vaddbias) diff --git a/paddle/fluid/operators/jit/refer/refer.cc b/paddle/fluid/operators/jit/refer/refer.cc index 69d039422f32fecf7e7a38631e39cfe3dfb7a2e5..4e9c530344b8f624a192a8068418e80357d8174a 100644 --- a/paddle/fluid/operators/jit/refer/refer.cc +++ b/paddle/fluid/operators/jit/refer/refer.cc @@ -26,4 +26,7 @@ REGISTER_REFER_KERNEL(vadd, VAdd); REGISTER_REFER_KERNEL(vaddrelu, VAddRelu); REGISTER_REFER_KERNEL(vsub, VSub); +REGISTER_REFER_KERNEL(vscal, VScal); +REGISTER_REFER_KERNEL(vaddbias, VAddBias); + #undef REGISTER_REFER_KERNEL diff --git a/paddle/fluid/operators/jit/refer/refer.h b/paddle/fluid/operators/jit/refer/refer.h index 4d4d308cbd169fb9ab9b5ffe87a16e0d4d1b41ae..32ac5bf2d78210ee82bf814ce7a32ffa2bcacf91 100644 --- a/paddle/fluid/operators/jit/refer/refer.h +++ b/paddle/fluid/operators/jit/refer/refer.h @@ -59,6 +59,13 @@ void VScal(const T* a, const T* x, T* y, int n) { } } +template +void VAddBias(const T* a, const T* x, T* y, int n) { + for (int i = 0; i < n; ++i) { + y[i] = a[0] + x[i]; + } +} + #define DECLARE_REFER_KERNEL(name, tuples) \ template \ class name##Kernel : public ReferKernel> { \ @@ -66,11 +73,16 @@ void VScal(const T* a, const T* x, T* y, int n) { name##Kernel() { this->func = name; } \ } +// const T* x, const T* y, T* z, int n DECLARE_REFER_KERNEL(VMul, XYZNTuples); DECLARE_REFER_KERNEL(VAdd, XYZNTuples); DECLARE_REFER_KERNEL(VAddRelu, XYZNTuples); DECLARE_REFER_KERNEL(VSub, XYZNTuples); +// const T* a, const T* x, T* y, int n +DECLARE_REFER_KERNEL(VScal, AXYNTuples); +DECLARE_REFER_KERNEL(VAddBias, AXYNTuples); + #undef DECLARE_REFER_KERNEL } // namespace refer diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index 9ceca24079f7ab8dc3e7f5afb7d9bee84c9e954d..ea2cb7b7a42575a92f79c3f3507a165705b3b910 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -12,7 +12,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include // for memcpy #include #include #include @@ -59,9 +58,9 @@ std::vector TestSizes() { } template -void TestTartgetFunc(const typename KernelTuples::func_type tgt, - const std::vector& x, const std::vector& y, - const std::vector& zref) { +void TestXYZNFunc(const typename KernelTuples::func_type tgt, + const std::vector& x, const std::vector& y, + const std::vector& zref) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(zref.size(), x.size()); EXPECT_EQ(zref.size(), y.size()); @@ -88,9 +87,8 @@ void TestTartgetFunc(const typename KernelTuples::func_type tgt, template void TestXYZNKernel() { namespace jit = paddle::operators::jit; + VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { - VLOG(10) << "===== Test JITKernel " << jit::to_string(KT) - << ", size: " << d; auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); @@ -119,7 +117,7 @@ void TestXYZNKernel() { auto jitcode = jit::GetJitCode, PlaceType>(d); if (jitcode) { VLOG(10) << "Test Jitcode Kernel, size: " << d; - TestTartgetFunc>(jitcode, x, y, zref); + TestXYZNFunc>(jitcode, x, y, zref); } // test all impls in more @@ -134,14 +132,14 @@ void TestXYZNKernel() { if (i && i->UseMe(d)) { auto more = i->GetFunc(); VLOG(10) << "Test More Kernel, size: " << d; - TestTartgetFunc>(more, x, y, zref); + TestXYZNFunc>(more, x, y, zref); } } } // Test result from Get function VLOG(10) << "Test Get function, size: " << d; auto tgt = jit::Get, PlaceType>(d); - TestTartgetFunc>(tgt, x, y, zref); + TestXYZNFunc>(tgt, x, y, zref); } } @@ -169,4 +167,89 @@ TEST(JITKernel, vsub) { TestXYZNKernel(); } -TEST(JITKernel, pool) {} +template +void TestAXYNFunc(const typename KernelTuples::func_type tgt, const T a, + const std::vector& x, const std::vector& yref) { + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(yref.size(), x.size()); + const T* x_data = x.data(); + const T* yref_data = yref.data(); + const int d = yref.size(); + std::vector ytgt(d); + T* ytgt_data = ytgt.data(); + // test normal + tgt(&a, x_data, ytgt_data, d); + ExpectEQ(ytgt_data, yref_data, d); + // test inplace x + std::copy(x.begin(), x.end(), ytgt.begin()); + tgt(&a, ytgt_data, ytgt_data, d); + ExpectEQ(ytgt_data, yref_data, d); +} + +template +void TestAXYNKernel() { + namespace jit = paddle::operators::jit; + VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); + for (int d : TestSizes()) { + auto ref = jit::GetRefer>(); + EXPECT_TRUE(ref != nullptr); + + const T a = static_cast(3); + std::vector x(d), yref(d); + std::vector xinp(d); // inplace test + RandomVec(d, x.data()); + std::copy(x.begin(), x.end(), xinp.begin()); + + const T* x_data = x.data(); + T* yref_data = yref.data(); + T* xinp_data = xinp.data(); + // test refer code inplace + ref(&a, x_data, yref_data, d); + ref(&a, xinp_data, xinp_data, d); + ExpectEQ(xinp_data, yref_data, d); + + // test jitcode + auto jitcode = jit::GetJitCode, PlaceType>(d); + if (jitcode) { + VLOG(10) << "Test Jitcode Kernel, size: " << d; + TestAXYNFunc>(jitcode, a, x, yref); + } + + // test all impls in more + jit::KernelKey kkey(KT, PlaceType()); + auto& pool = jit::KernelPool().Instance().AllKernels(); + auto iter = pool.find(kkey); + if (iter != pool.end()) { + auto& impls = iter->second; + for (auto& impl : impls) { + auto i = dynamic_cast>*>( + impl.get()); + if (i && i->UseMe(d)) { + auto more = i->GetFunc(); + VLOG(10) << "Test More Kernel, size: " << d; + TestAXYNFunc>(more, a, x, yref); + } + } + } + // Test result from Get function + VLOG(10) << "Test Get function, size: " << d; + auto tgt = jit::Get, PlaceType>(d); + TestAXYNFunc>(tgt, a, x, yref); + } +} + +TEST(JITKernel, vscal) { + namespace jit = paddle::operators::jit; + TestAXYNKernel(); + TestAXYNKernel(); +} + +TEST(JITKernel, vaddbias) { + namespace jit = paddle::operators::jit; + TestAXYNKernel(); + TestAXYNKernel(); +} + +TEST(JITKernel, pool) { + // TODO(TJ): add some test +} diff --git a/paddle/fluid/operators/math/jit_kernel_refer.h b/paddle/fluid/operators/math/jit_kernel_refer.h index eaca02ba14759ab87602675f2422171fb6d0ab59..b5ee07e748890e2c6fe862d971fcfb012f0cce48 100644 --- a/paddle/fluid/operators/math/jit_kernel_refer.h +++ b/paddle/fluid/operators/math/jit_kernel_refer.h @@ -24,13 +24,6 @@ namespace math { namespace jitkernel { namespace refer { -template -void VAddBias(const T* a, const T* x, T* y, int n) { - for (int i = 0; i < n; ++i) { - y[i] = a[0] + x[i]; - } -} - template void VRelu(const T* x, T* y, int n) { for (int i = 0; i < n; ++i) {