diff --git a/paddle/fluid/operators/jit/benchmark.cc b/paddle/fluid/operators/jit/benchmark.cc index ca636b020c222fa54f36d04cddf005ff02b14323..4e5d530251ea1bb9c16b18fc70707bfa89883ee3 100644 --- a/paddle/fluid/operators/jit/benchmark.cc +++ b/paddle/fluid/operators/jit/benchmark.cc @@ -51,251 +51,108 @@ std::vector TestSizes() { return s; } -// return this function avg time -template -double BenchXYZNFunc(const typename KernelTuples::func_type tgt, - const std::vector& x, const std::vector& y, - std::vector& z) { // NOLINT - const T* x_data = x.data(); - const T* y_data = y.data(); - const int d = z.size(); - T* z_data = z.data(); - - for (int i = 0; i < FLAGS_burning; ++i) { - tgt(x_data, y_data, z_data, d); +template +struct BenchFunc { + // return this function avg time + double operator()(const typename KernelTuples::func_type tgt, Args... args) { + for (int i = 0; i < FLAGS_burning; ++i) { + tgt(args...); + } + auto start = GetCurrentUS(); + for (int i = 0; i < FLAGS_repeat; ++i) { + tgt(args...); + } + auto end = GetCurrentUS(); + return (end - start) / FLAGS_repeat; + } +}; + +namespace jit = paddle::operators::jit; + +template +void BenchAllImpls(const typename KernelTuples::attr_type& attr, Args... args) { + BenchFunc benchmark; + std::vector> infos; + // test refer + auto refer = jit::GetRefer(); + if (!refer) { + LOG(FATAL) << "Refer can not be empty!"; + } + infos.push_back(std::make_pair("Refer", benchmark(refer, args...))); + + // test jitcode + auto jitcode = jit::GetJitCode(attr); + if (jitcode) { + infos.push_back(std::make_pair("JitCode", benchmark(jitcode, args...))); + } + // test all impls in more + jit::KernelKey kkey(KT, PlaceType()); + auto& pool = jit::KernelPool().Instance().AllKernels(); + auto iter = pool.find(kkey); + if (iter != pool.end()) { + auto& impls = iter->second; + for (auto& impl : impls) { + auto i = dynamic_cast*>(impl.get()); + if (i && i->UseMe(attr)) { + auto more = i->GetFunc(); + infos.push_back(std::make_pair("More", benchmark(more, args...))); + } + } } - auto start = GetCurrentUS(); - for (int i = 0; i < FLAGS_repeat; ++i) { - tgt(x_data, y_data, z_data, d); + // Test result from Get function + auto tgt = jit::Get(attr); + if (!tgt) { + LOG(FATAL) << "Target can not be empty!"; } - auto end = GetCurrentUS(); - return (end - start) / FLAGS_repeat; + infos.push_back(std::make_pair("Target", benchmark(tgt, args...))); + + // print + std::ostringstream loginfos; + loginfos << "Kernel Type " << jit::to_string(KT) << ": " << attr << ": "; + for (auto pair : infos) { + loginfos << pair.first << " takes " << pair.second << " us; "; + } + LOG(INFO) << loginfos.str(); } template void BenchXYZNKernel() { - namespace jit = paddle::operators::jit; for (int d : TestSizes()) { - std::vector> infos; std::vector x(d), y(d), z(d); RandomVec(d, x.data()); RandomVec(d, y.data()); - // refer - auto refer = jit::GetRefer>(); - if (refer) { - auto res = BenchXYZNFunc>(refer, x, y, z); - infos.push_back(std::make_pair("Refer", res)); - } - - // test jitcode - auto jitcode = jit::GetJitCode, PlaceType>(d); - if (jitcode) { - auto res = BenchXYZNFunc>(jitcode, x, y, z); - infos.push_back(std::make_pair("JitCode", res)); - } - - // test all impls in more - jit::KernelKey kkey(KT, PlaceType()); - auto& pool = jit::KernelPool().Instance().AllKernels(); - auto iter = pool.find(kkey); - if (iter != pool.end()) { - auto& impls = iter->second; - for (auto& impl : impls) { - auto i = dynamic_cast>*>( - impl.get()); - if (i && i->UseMe(d)) { - auto more = i->GetFunc(); - auto res = BenchXYZNFunc>(more, x, y, z); - infos.push_back(std::make_pair("More", res)); - } - } - } - - // Test result from Get function - auto tgt = jit::Get, PlaceType>(d); - if (!tgt) { - LOG(ERROR) << "Target can not be empty!"; - } - auto res = BenchXYZNFunc>(tgt, x, y, z); - infos.push_back(std::make_pair("Target", res)); - - // print - std::ostringstream loginfos; - loginfos << "Kernel Type: " << jit::to_string(KT) << ", size " << d << ": "; - for (auto pair : infos) { - loginfos << pair.first << " takes " << pair.second << " us; "; - } - LOG(INFO) << loginfos.str(); + BenchAllImpls, PlaceType>(d, x.data(), y.data(), + z.data(), d); } } -// return this function avg time -template -double BenchAXYNFunc(const typename KernelTuples::func_type tgt, const T a, - const std::vector& x, - std::vector& y) { // NOLINT - const T* x_data = x.data(); - T* y_data = y.data(); - const int d = y.size(); - for (int i = 0; i < FLAGS_burning; ++i) { - tgt(&a, x_data, y_data, d); - } - auto start = GetCurrentUS(); - for (int i = 0; i < FLAGS_repeat; ++i) { - tgt(&a, x_data, y_data, d); - } - auto end = GetCurrentUS(); - return (end - start) / FLAGS_repeat; -} - template void BenchAXYNKernel() { - namespace jit = paddle::operators::jit; for (int d : TestSizes()) { - std::vector> infos; const T a = static_cast(3); std::vector x(d), y(d); RandomVec(d, x.data()); - // test refer - auto refer = jit::GetRefer>(); - if (refer) { - auto res = BenchAXYNFunc>(refer, a, x, y); - infos.push_back(std::make_pair("Refer", res)); - } - // test jitcode - auto jitcode = jit::GetJitCode, PlaceType>(d); - if (jitcode) { - auto res = BenchAXYNFunc>(jitcode, a, x, y); - infos.push_back(std::make_pair("JitCode", res)); - } - // test all impls in more - jit::KernelKey kkey(KT, PlaceType()); - auto& pool = jit::KernelPool().Instance().AllKernels(); - auto iter = pool.find(kkey); - if (iter != pool.end()) { - auto& impls = iter->second; - for (auto& impl : impls) { - auto i = dynamic_cast>*>( - impl.get()); - if (i && i->UseMe(d)) { - auto more = i->GetFunc(); - auto res = BenchAXYNFunc>(more, a, x, y); - infos.push_back(std::make_pair("More", res)); - } - } - } - // Test result from Get function - auto tgt = jit::Get, PlaceType>(d); - if (!tgt) { - LOG(ERROR) << "Target can not be empty!"; - } - auto res = BenchAXYNFunc>(tgt, a, x, y); - infos.push_back(std::make_pair("Target", res)); - // print - std::ostringstream loginfos; - loginfos << "Kernel Type: " << jit::to_string(KT) << ", size " << d << ": "; - for (auto pair : infos) { - loginfos << pair.first << " takes " << pair.second << " us; "; - } - LOG(INFO) << loginfos.str(); - } -} - -// return this function avg time -template -double BenchXYNFunc(const typename KernelTuples::func_type tgt, - const std::vector& x, - std::vector& y) { // NOLINT - const T* x_data = x.data(); - T* y_data = y.data(); - const int d = y.size(); - for (int i = 0; i < FLAGS_burning; ++i) { - tgt(x_data, y_data, d); + BenchAllImpls, PlaceType>(d, &a, x.data(), y.data(), + d); } - auto start = GetCurrentUS(); - for (int i = 0; i < FLAGS_repeat; ++i) { - tgt(x_data, y_data, d); - } - auto end = GetCurrentUS(); - return (end - start) / FLAGS_repeat; } template void BenchXYNKernel() { - namespace jit = paddle::operators::jit; for (int d : TestSizes()) { - std::vector> infos; std::vector x(d), y(d); RandomVec(d, x.data()); - // test refer - auto refer = jit::GetRefer>(); - if (refer) { - auto res = BenchXYNFunc>(refer, x, y); - infos.push_back(std::make_pair("Refer", res)); - } - // test jitcode - auto jitcode = jit::GetJitCode, PlaceType>(d); - if (jitcode) { - auto res = BenchXYNFunc>(jitcode, x, y); - infos.push_back(std::make_pair("JitCode", res)); - } - // test all impls in more - jit::KernelKey kkey(KT, PlaceType()); - auto& pool = jit::KernelPool().Instance().AllKernels(); - auto iter = pool.find(kkey); - if (iter != pool.end()) { - auto& impls = iter->second; - for (auto& impl : impls) { - auto i = - dynamic_cast>*>(impl.get()); - if (i && i->UseMe(d)) { - auto more = i->GetFunc(); - auto res = BenchXYNFunc>(more, x, y); - infos.push_back(std::make_pair("More", res)); - } - } - } - // Test result from Get function - auto tgt = jit::Get, PlaceType>(d); - if (!tgt) { - LOG(ERROR) << "Target can not be empty!"; - } - auto res = BenchXYNFunc>(tgt, x, y); - infos.push_back(std::make_pair("Target", res)); - // print - std::ostringstream loginfos; - loginfos << "Kernel Type: " << jit::to_string(KT) << ", size " << d << ": "; - for (auto pair : infos) { - loginfos << pair.first << " takes " << pair.second << " us; "; - } - LOG(INFO) << loginfos.str(); + BenchAllImpls, PlaceType>(d, x.data(), y.data(), d); } } -// return this function avg time -template -double BenchLSTMFunc(const typename KernelTuples::func_type tgt, - const paddle::operators::jit::lstm_attr_t* attr, - paddle::operators::jit::lstm_t* step) { - for (int i = 0; i < FLAGS_burning; ++i) { - tgt(step, attr); - } - auto start = GetCurrentUS(); - for (int i = 0; i < FLAGS_repeat; ++i) { - tgt(step, attr); - } - auto end = GetCurrentUS(); - return (end - start) / FLAGS_repeat; -} - template void BenchLSTMKernel() { - namespace jit = paddle::operators::jit; for (bool use_peephole : {true, false}) { for (int d : TestSizes()) { const jit::lstm_attr_t attr(d, jit::vsigmoid, jit::vtanh, jit::vtanh, use_peephole); - std::vector> infos; std::vector x(4 * d), ct_1(d), ct(d), ht(d), wp(3 * d), checked(2 * d); RandomVec(4 * d, x.data(), -2.f, 2.f); RandomVec(3 * d, wp.data(), -2.f, 2.f); @@ -315,77 +172,15 @@ void BenchLSTMKernel() { step.wp = wp_data; step.checked = checked_data; } - - // test refer - auto refer = jit::GetRefer>(); - if (refer) { - auto res = BenchLSTMFunc>(refer, &attr, &step); - infos.push_back(std::make_pair("Refer", res)); - } - // test jitcode - auto jitcode = jit::GetJitCode, PlaceType>(attr); - if (jitcode) { - auto res = BenchLSTMFunc>(jitcode, &attr, &step); - infos.push_back(std::make_pair("JitCode", res)); - } - // test all impls in more - jit::KernelKey kkey(KT, PlaceType()); - auto& pool = jit::KernelPool().Instance().AllKernels(); - auto iter = pool.find(kkey); - if (iter != pool.end()) { - auto& impls = iter->second; - for (auto& impl : impls) { - auto i = dynamic_cast>*>( - impl.get()); - if (i && i->UseMe(attr)) { - auto more = i->GetFunc(); - auto res = BenchLSTMFunc>(more, &attr, &step); - infos.push_back(std::make_pair("More", res)); - } - } - } - // Test result from Get function - auto tgt = jit::Get, PlaceType>(attr); - if (!tgt) { - LOG(ERROR) << "Target can not be empty!"; - } - auto res = BenchLSTMFunc>(tgt, &attr, &step); - infos.push_back(std::make_pair("Target", res)); - // print - std::ostringstream loginfos; - loginfos << "Kernel Type: " << jit::to_string(KT) - << ", Sigmoid,Tanh,Tanh, " << (use_peephole ? "Peephole_" : "") - << " size " << d << ": "; - for (auto pair : infos) { - loginfos << pair.first << " takes " << pair.second << " us; "; - } - LOG(INFO) << loginfos.str(); + BenchAllImpls, PlaceType>(attr, &step, &attr); } } } -// return this function avg time -template -double BenchGRUFunc(const typename KernelTuples::func_type tgt, - const paddle::operators::jit::gru_attr_t* attr, - paddle::operators::jit::gru_t* step) { - for (int i = 0; i < FLAGS_burning; ++i) { - tgt(step, attr); - } - auto start = GetCurrentUS(); - for (int i = 0; i < FLAGS_repeat; ++i) { - tgt(step, attr); - } - auto end = GetCurrentUS(); - return (end - start) / FLAGS_repeat; -} - template void BenchGRUKernel() { - namespace jit = paddle::operators::jit; for (int d : TestSizes()) { const jit::gru_attr_t attr(d, jit::vsigmoid, jit::vtanh); - std::vector> infos; std::vector x(3 * d), ht_1(d), ht(d); RandomVec(3 * d, x.data(), -2.f, 2.f); RandomVec(d, ht_1.data(), -2.f, 2.f); @@ -396,50 +191,7 @@ void BenchGRUKernel() { step.gates = x_data; step.ht_1 = ht_1_data; step.ht = ht_data; - - // test refer - auto refer = jit::GetRefer>(); - if (refer) { - auto res = BenchGRUFunc>(refer, &attr, &step); - infos.push_back(std::make_pair("Refer", res)); - } - // test jitcode - auto jitcode = jit::GetJitCode, PlaceType>(attr); - if (jitcode) { - auto res = BenchGRUFunc>(jitcode, &attr, &step); - infos.push_back(std::make_pair("JitCode", res)); - } - // test all impls in more - jit::KernelKey kkey(KT, PlaceType()); - auto& pool = jit::KernelPool().Instance().AllKernels(); - auto iter = pool.find(kkey); - if (iter != pool.end()) { - auto& impls = iter->second; - for (auto& impl : impls) { - auto i = - dynamic_cast>*>(impl.get()); - if (i && i->UseMe(attr)) { - auto more = i->GetFunc(); - auto res = BenchGRUFunc>(more, &attr, &step); - infos.push_back(std::make_pair("More", res)); - } - } - } - // Test result from Get function - auto tgt = jit::Get, PlaceType>(attr); - if (!tgt) { - LOG(ERROR) << "Target can not be empty!"; - } - auto res = BenchGRUFunc>(tgt, &attr, &step); - infos.push_back(std::make_pair("Target", res)); - // print - std::ostringstream loginfos; - loginfos << "Kernel Type: " << jit::to_string(KT) << ", Sigmoid,Tanh, size " - << d << ": "; - for (auto pair : infos) { - loginfos << pair.first << " takes " << pair.second << " us; "; - } - LOG(INFO) << loginfos.str(); + BenchAllImpls, PlaceType>(attr, &step, &attr); } } @@ -456,16 +208,17 @@ int main(int argc, char* argv[]) { << " times."; using T = float; using PlaceType = paddle::platform::CPUPlace; - namespace jit = paddle::operators::jit; + // xyzn BenchXYZNKernel(); BenchXYZNKernel(); BenchXYZNKernel(); BenchXYZNKernel(); + // axyn BenchAXYNKernel(); BenchAXYNKernel(); - // act + // xyn BenchXYNKernel(); BenchXYNKernel(); BenchXYNKernel(); diff --git a/paddle/fluid/operators/jit/helper.h b/paddle/fluid/operators/jit/helper.h index 302e70caa7e423ae80a23eb1983fe9108fa5aba5..3431c22111f948b3d5261a59feb761668300ce24 100644 --- a/paddle/fluid/operators/jit/helper.h +++ b/paddle/fluid/operators/jit/helper.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include "paddle/fluid/operators/jit/gen_base.h" @@ -124,6 +125,19 @@ const char* to_string(KernelType kt); KernelType to_kerneltype(const std::string& act); +inline std::ostream& operator<<(std::ostream& os, const lstm_attr_t& attr) { + os << "dim_size[" << attr.d << "],act_gate[" << to_string(attr.act_gate) + << "],act_cand[" << to_string(attr.act_cand) << "],act_cell[" + << to_string(attr.act_cell) << "],use_peephole[" + << (attr.use_peephole ? "True" : "False") << "]"; + return os; +} +inline std::ostream& operator<<(std::ostream& os, const gru_attr_t& attr) { + os << "dim_size[" << attr.d << "],act_gate[" << to_string(attr.act_gate) + << "],act_cand[" << to_string(attr.act_cand) << "]"; + return os; +} + } // namespace jit } // namespace operators } // namespace paddle