diff --git a/paddle/fluid/operators/jit/benchmark.cc b/paddle/fluid/operators/jit/benchmark.cc index 2ad87e414bd3d8cd522c8d64ecd5fcf9371a0a63..a7e5eb6cf4a19bd6a53522b2bb4986651a8cf910 100644 --- a/paddle/fluid/operators/jit/benchmark.cc +++ b/paddle/fluid/operators/jit/benchmark.cc @@ -201,6 +201,77 @@ void BenchAXYNKernel() { } } +// return this function avg time +template +double BenchXYNFunc(const typename KernelTuples::func_type tgt, + const std::vector& x, + std::vector& y) { // NOLINT + const T* x_data = x.data(); + T* y_data = y.data(); + const int d = y.size(); + for (int i = 0; i < FLAGS_burning; ++i) { + tgt(x_data, y_data, d); + } + auto start = GetCurrentUS(); + for (int i = 0; i < FLAGS_repeat; ++i) { + tgt(x_data, y_data, d); + } + auto end = GetCurrentUS(); + return (end - start) / FLAGS_repeat; +} + +template +void BenchXYNKernel() { + namespace jit = paddle::operators::jit; + for (int d : TestSizes()) { + std::vector> infos; + std::vector x(d), y(d); + RandomVec(d, x.data()); + // test refer + auto refer = jit::GetRefer>(); + if (refer) { + auto res = BenchXYNFunc>(refer, x, y); + infos.push_back(std::make_pair("Refer", res)); + } + // test jitcode + auto jitcode = jit::GetJitCode, PlaceType>(d); + if (jitcode) { + auto res = BenchXYNFunc>(jitcode, x, y); + infos.push_back(std::make_pair("JitCode", res)); + } + // test all impls in more + jit::KernelKey kkey(KT, PlaceType()); + auto& pool = jit::KernelPool().Instance().AllKernels(); + auto iter = pool.find(kkey); + if (iter != pool.end()) { + auto& impls = iter->second; + for (auto& impl : impls) { + auto i = + dynamic_cast>*>(impl.get()); + if (i && i->UseMe(d)) { + auto more = i->GetFunc(); + auto res = BenchXYNFunc>(more, x, y); + infos.push_back(std::make_pair("More", res)); + } + } + } + // Test result from Get function + auto tgt = jit::Get, PlaceType>(d); + if (!tgt) { + LOG(ERROR) << "Target can not be empty!"; + } + auto res = BenchXYNFunc>(tgt, x, y); + infos.push_back(std::make_pair("Target", res)); + // print + std::ostringstream loginfos; + loginfos << "Kernel Type: " << jit::to_string(KT) << ", size " << d << ": "; + for (auto pair : infos) { + loginfos << pair.first << " takes " << pair.second << " us; "; + } + LOG(INFO) << loginfos.str(); + } +} + // Benchmark all jit kernels including jitcode, mkl and refer. // To use this tool, run command: ./benchmark [options...] // Options: @@ -222,4 +293,10 @@ int main(int argc, char* argv[]) { BenchAXYNKernel(); BenchAXYNKernel(); + + BenchXYNKernel(); + BenchXYNKernel(); + BenchXYNKernel(); + BenchXYNKernel(); + BenchXYNKernel(); } diff --git a/paddle/fluid/operators/jit/helper.cc b/paddle/fluid/operators/jit/helper.cc index c9aaffb8b8d81e3cf7b5b3366d244d363af84247..c010b64c9cbfa3ec6cdaf6f91b7e1340904f1e9a 100644 --- a/paddle/fluid/operators/jit/helper.cc +++ b/paddle/fluid/operators/jit/helper.cc @@ -19,28 +19,30 @@ namespace paddle { namespace operators { namespace jit { +#define ONE_CASE(key) \ + case key: \ + return #key + const char* to_string(KernelType kt) { switch (kt) { - case vmul: - return "vmul"; - case vadd: - return "vadd"; - case vaddrelu: - return "vaddrelu"; - case vsub: - return "vsub"; - case vscal: - return "vscal"; - case vexp: - return "vexp"; - case vaddbias: - return "vaddbias"; + ONE_CASE(vmul); + ONE_CASE(vadd); + ONE_CASE(vaddrelu); + ONE_CASE(vsub); + ONE_CASE(vscal); + ONE_CASE(vaddbias); + ONE_CASE(vrelu); + ONE_CASE(videntity); + ONE_CASE(vexp); + ONE_CASE(vsigmoid); + ONE_CASE(vtanh); default: PADDLE_THROW("Not support type: %d", kt); return "NOT JITKernel"; } return nullptr; } +#undef ONE_CASE } // namespace jit } // namespace operators diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h index 74ecf3dade5d70c2c8a1b8732f98448719ee85a7..29b881b75409e107f37273acc3cbd7083fdbfe08 100644 --- a/paddle/fluid/operators/jit/kernel_base.h +++ b/paddle/fluid/operators/jit/kernel_base.h @@ -26,7 +26,11 @@ typedef enum { vsub, vscal, vaddbias, - vexp + vrelu, + videntity, + vexp, + vsigmoid, + vtanh } KernelType; template @@ -39,6 +43,13 @@ struct XYZNTuples { template struct AXYNTuples : public XYZNTuples {}; +template +struct XYNTuples { + typedef T data_type; + typedef int attr_type; + typedef void (*func_type)(const T*, T*, int); +}; + // Just for adding to kernel pool without template class Kernel { public: diff --git a/paddle/fluid/operators/jit/refer/CMakeLists.txt b/paddle/fluid/operators/jit/refer/CMakeLists.txt index afe3f6ca0f48987fa478844b029e4c442f92ac93..dc07ddb914ba18578c4eb3232226db8df0450cf4 100644 --- a/paddle/fluid/operators/jit/refer/CMakeLists.txt +++ b/paddle/fluid/operators/jit/refer/CMakeLists.txt @@ -13,3 +13,8 @@ USE_JITKERNEL_REFER(vaddrelu) USE_JITKERNEL_REFER(vsub) USE_JITKERNEL_REFER(vscal) USE_JITKERNEL_REFER(vaddbias) +USE_JITKERNEL_REFER(vrelu) +USE_JITKERNEL_REFER(videntity) +USE_JITKERNEL_REFER(vexp) +USE_JITKERNEL_REFER(vsigmoid) +USE_JITKERNEL_REFER(vtanh) diff --git a/paddle/fluid/operators/jit/refer/refer.cc b/paddle/fluid/operators/jit/refer/refer.cc index 4e9c530344b8f624a192a8068418e80357d8174a..f716ca89c58fab92a081cbdb4111e5a5007ae4c5 100644 --- a/paddle/fluid/operators/jit/refer/refer.cc +++ b/paddle/fluid/operators/jit/refer/refer.cc @@ -29,4 +29,10 @@ REGISTER_REFER_KERNEL(vsub, VSub); REGISTER_REFER_KERNEL(vscal, VScal); REGISTER_REFER_KERNEL(vaddbias, VAddBias); +REGISTER_REFER_KERNEL(vrelu, VRelu); +REGISTER_REFER_KERNEL(videntity, VIdentity); +REGISTER_REFER_KERNEL(vexp, VExp); +REGISTER_REFER_KERNEL(vsigmoid, VSigmoid); +REGISTER_REFER_KERNEL(vtanh, VTanh); + #undef REGISTER_REFER_KERNEL diff --git a/paddle/fluid/operators/jit/refer/refer.h b/paddle/fluid/operators/jit/refer/refer.h index 32ac5bf2d78210ee82bf814ce7a32ffa2bcacf91..7ef60a2d5395d4e83d00d87a09e4acb1cc20b791 100644 --- a/paddle/fluid/operators/jit/refer/refer.h +++ b/paddle/fluid/operators/jit/refer/refer.h @@ -66,6 +66,50 @@ void VAddBias(const T* a, const T* x, T* y, int n) { } } +template +void VRelu(const T* x, T* y, int n) { + for (int i = 0; i < n; ++i) { + y[i] = x[i] > 0 ? x[i] : 0; + } +} + +template +inline void VIdentity(const T* x, T* y, int n) { + for (int i = 0; i < n; ++i) { + y[i] = x[i]; + } +} + +template +void VExp(const T* x, T* y, int n) { + for (int i = 0; i < n; ++i) { + y[i] = std::exp(x[i]); + } +} + +template +void VSigmoid(const T* x, T* y, int n) { + // y = 1 / (1 + e^-x) + const T min = SIGMOID_THRESHOLD_MIN; + const T max = SIGMOID_THRESHOLD_MAX; + for (int i = 0; i < n; ++i) { + T tmp = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]); + y[i] = static_cast(1) / (static_cast(1) + std::exp(-tmp)); + } +} + +template +void VTanh(const T* x, T* y, int n) { + // y = 2 * sigmoid(2x) - 1 + for (int i = 0; i < n; ++i) { + y[i] = static_cast(2) * x[i]; + } + VSigmoid(y, y, n); + for (int i = 0; i < n; ++i) { + y[i] = static_cast(2) * y[i] - static_cast(1); + } +} + #define DECLARE_REFER_KERNEL(name, tuples) \ template \ class name##Kernel : public ReferKernel> { \ @@ -83,6 +127,13 @@ DECLARE_REFER_KERNEL(VSub, XYZNTuples); DECLARE_REFER_KERNEL(VScal, AXYNTuples); DECLARE_REFER_KERNEL(VAddBias, AXYNTuples); +// const T* x, T* y, int n +DECLARE_REFER_KERNEL(VRelu, XYNTuples); +DECLARE_REFER_KERNEL(VIdentity, XYNTuples); +DECLARE_REFER_KERNEL(VExp, XYNTuples); +DECLARE_REFER_KERNEL(VSigmoid, XYNTuples); +DECLARE_REFER_KERNEL(VTanh, XYNTuples); + #undef DECLARE_REFER_KERNEL } // namespace refer diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index ea2cb7b7a42575a92f79c3f3507a165705b3b910..4c9b853b6e62d2a9d6d8a8b4ff297f916a21ad55 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -250,6 +250,106 @@ TEST(JITKernel, vaddbias) { TestAXYNKernel(); } +template +void TestXYNFunc(const typename KernelTuples::func_type tgt, + const std::vector& x, const std::vector& yref) { + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(yref.size(), x.size()); + const T* x_data = x.data(); + const T* yref_data = yref.data(); + const int d = yref.size(); + std::vector ytgt(d); + T* ytgt_data = ytgt.data(); + // test normal + tgt(x_data, ytgt_data, d); + ExpectEQ(ytgt_data, yref_data, d); + // test inplace x + std::copy(x.begin(), x.end(), ytgt.begin()); + tgt(ytgt_data, ytgt_data, d); + ExpectEQ(ytgt_data, yref_data, d); +} + +template +void TestXYNKernel() { + namespace jit = paddle::operators::jit; + VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); + for (int d : TestSizes()) { + auto ref = jit::GetRefer>(); + EXPECT_TRUE(ref != nullptr); + + std::vector x(d), yref(d); + std::vector xinp(d); // inplace test + RandomVec(d, x.data()); + std::copy(x.begin(), x.end(), xinp.begin()); + + const T* x_data = x.data(); + T* yref_data = yref.data(); + T* xinp_data = xinp.data(); + // test refer code inplace + ref(x_data, yref_data, d); + ref(xinp_data, xinp_data, d); + ExpectEQ(xinp_data, yref_data, d); + + // test jitcode + auto jitcode = jit::GetJitCode, PlaceType>(d); + if (jitcode) { + VLOG(10) << "Test Jitcode Kernel, size: " << d; + TestXYNFunc>(jitcode, x, yref); + } + + // test all impls in more + jit::KernelKey kkey(KT, PlaceType()); + auto& pool = jit::KernelPool().Instance().AllKernels(); + auto iter = pool.find(kkey); + if (iter != pool.end()) { + auto& impls = iter->second; + for (auto& impl : impls) { + auto i = + dynamic_cast>*>(impl.get()); + if (i && i->UseMe(d)) { + auto more = i->GetFunc(); + VLOG(10) << "Test More Kernel, size: " << d; + TestXYNFunc>(more, x, yref); + } + } + } + // Test result from Get function + VLOG(10) << "Test Get function, size: " << d; + auto tgt = jit::Get, PlaceType>(d); + TestXYNFunc>(tgt, x, yref); + } +} + +TEST(JITKernel, vrelu) { + namespace jit = paddle::operators::jit; + TestXYNKernel(); + TestXYNKernel(); +} + +TEST(JITKernel, videntity) { + namespace jit = paddle::operators::jit; + TestXYNKernel(); + TestXYNKernel(); +} + +TEST(JITKernel, vexp) { + namespace jit = paddle::operators::jit; + TestXYNKernel(); + TestXYNKernel(); +} + +TEST(JITKernel, vsigmoid) { + namespace jit = paddle::operators::jit; + TestXYNKernel(); + TestXYNKernel(); +} + +TEST(JITKernel, vtanh) { + namespace jit = paddle::operators::jit; + TestXYNKernel(); + TestXYNKernel(); +} + TEST(JITKernel, pool) { // TODO(TJ): add some test } diff --git a/paddle/fluid/operators/math/jit_kernel_refer.h b/paddle/fluid/operators/math/jit_kernel_refer.h index b5ee07e748890e2c6fe862d971fcfb012f0cce48..a03e851de56d3dbf1a2a95b57e531259cd5fc578 100644 --- a/paddle/fluid/operators/math/jit_kernel_refer.h +++ b/paddle/fluid/operators/math/jit_kernel_refer.h @@ -24,46 +24,6 @@ namespace math { namespace jitkernel { namespace refer { -template -void VRelu(const T* x, T* y, int n) { - for (int i = 0; i < n; ++i) { - y[i] = x[i] > 0 ? x[i] : 0; - } -} - -template -inline void VIdentity(const T* x, T* y, int n) {} - -template -void VExp(const T* x, T* y, int n) { - for (int i = 0; i < n; ++i) { - y[i] = std::exp(x[i]); - } -} - -template -void VSigmoid(const T* x, T* y, int n) { - // y = 1 / (1 + e^-x) - const T min = SIGMOID_THRESHOLD_MIN; - const T max = SIGMOID_THRESHOLD_MAX; - for (int i = 0; i < n; ++i) { - T tmp = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]); - y[i] = static_cast(1) / (static_cast(1) + std::exp(-tmp)); - } -} - -template -void VTanh(const T* x, T* y, int n) { - // y = 2 * sigmoid(2x) - 1 - for (int i = 0; i < n; ++i) { - y[i] = static_cast(2) * x[i]; - } - VSigmoid(y, y, n); - for (int i = 0; i < n; ++i) { - y[i] = static_cast(2) * y[i] - static_cast(1); - } -} - template void (*getActFunc(const std::string& type))(const T*, T*, int) { // NOLINT if (type == "sigmoid") {