提交 579d7582 编写于 作者: T tensor-tang

fix jitkernel tests and refine benchmark

test=develop
上级 e2ba9668
...@@ -26,6 +26,49 @@ ...@@ -26,6 +26,49 @@
DEFINE_int32(burning, 10, "Burning times."); DEFINE_int32(burning, 10, "Burning times.");
DEFINE_int32(repeat, 3000, "Repeat times."); DEFINE_int32(repeat, 3000, "Repeat times.");
DEFINE_int32(max_size, 1000, "The Max size would be tested."); DEFINE_int32(max_size, 1000, "The Max size would be tested.");
DEFINE_string(filter, "", "The Benchmark name would be run.");
class BenchJITKernel {
public:
BenchJITKernel() = default;
virtual ~BenchJITKernel() = default;
virtual void Run() = 0;
virtual const char* Name() = 0;
virtual const char* Dtype() = 0;
virtual const char* Place() = 0;
};
static std::vector<BenchJITKernel*> g_all_benchmarks;
BenchJITKernel* InsertBenchmark(BenchJITKernel* b) {
g_all_benchmarks.push_back(b);
return b;
}
#define BENCH_JITKERNEL(name, dtype, place) \
class BenchJITKernel_##name##_##dtype##_##place##_ : public BenchJITKernel { \
public: \
const char* Name() override { return #name; } \
const char* Dtype() override { return #dtype; } \
const char* Place() override { return #place; } \
void Run() override; \
}; \
static auto inserted_##name##_##dtype##_##place##_ = \
InsertBenchmark(new BenchJITKernel_##name##_##dtype##_##place##_()); \
void BenchJITKernel_##name##_##dtype##_##place##_::Run()
#define BENCH_FP32_CPU(name) BENCH_JITKERNEL(name, FP32, CPU)
void RUN_ALL_BENCHMARK() {
for (auto p : g_all_benchmarks) {
if (!FLAGS_filter.empty() && FLAGS_filter != p->Name()) {
continue;
}
LOG(INFO) << "Benchmark " << p->Name() << "." << p->Dtype() << "."
<< p->Place();
p->Run();
}
}
template <typename T> template <typename T>
void RandomVec(const int n, T* a, const T lower = static_cast<T>(-20.f), void RandomVec(const int n, T* a, const T lower = static_cast<T>(-20.f),
...@@ -228,49 +271,70 @@ void BenchMatMulKernel() { ...@@ -228,49 +271,70 @@ void BenchMatMulKernel() {
} }
} }
using T = float;
using PlaceType = paddle::platform::CPUPlace;
// xyzn
BENCH_FP32_CPU(kVMul) { BenchXYZNKernel<jit::kVMul, T, PlaceType>(); }
BENCH_FP32_CPU(kVAdd) { BenchXYZNKernel<jit::kVAdd, T, PlaceType>(); }
BENCH_FP32_CPU(kVAddRelu) { BenchXYZNKernel<jit::kVAddRelu, T, PlaceType>(); }
BENCH_FP32_CPU(kVSub) { BenchXYZNKernel<jit::kVSub, T, PlaceType>(); }
// axyn
BENCH_FP32_CPU(kVScal) { BenchAXYNKernel<jit::kVScal, T, PlaceType>(); }
BENCH_FP32_CPU(kVAddBias) { BenchAXYNKernel<jit::kVAddBias, T, PlaceType>(); }
// xyn
BENCH_FP32_CPU(kVRelu) { BenchXYNKernel<jit::kVRelu, T, PlaceType>(); }
BENCH_FP32_CPU(kVIdentity) { BenchXYNKernel<jit::kVIdentity, T, PlaceType>(); }
BENCH_FP32_CPU(kVSquare) { BenchXYNKernel<jit::kVSquare, T, PlaceType>(); }
BENCH_FP32_CPU(kVExp) { BenchXYNKernel<jit::kVExp, T, PlaceType>(); }
BENCH_FP32_CPU(kVSigmoid) { BenchXYNKernel<jit::kVSigmoid, T, PlaceType>(); }
BENCH_FP32_CPU(kVTanh) { BenchXYNKernel<jit::kVTanh, T, PlaceType>(); }
// lstm and peephole
BENCH_FP32_CPU(kLSTMCtHt) { BenchLSTMKernel<jit::kLSTMCtHt, T, PlaceType>(); }
BENCH_FP32_CPU(kLSTMC1H1) { BenchLSTMKernel<jit::kLSTMC1H1, T, PlaceType>(); }
// gru functions
BENCH_FP32_CPU(kGRUH1) { BenchGRUKernel<jit::kGRUH1, T, PlaceType>(); }
BENCH_FP32_CPU(kGRUHtPart1) {
BenchGRUKernel<jit::kGRUHtPart1, T, PlaceType>();
}
BENCH_FP32_CPU(kGRUHtPart2) {
BenchGRUKernel<jit::kGRUHtPart2, T, PlaceType>();
}
// seq pool function
BENCH_FP32_CPU(kSeqPool) { BenchSeqPoolKernel<jit::kSeqPool, T, PlaceType>(); }
// matmul
BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel<jit::kMatMul, T, PlaceType>(); }
// Benchmark all jit kernels including jitcode, mkl and refer. // Benchmark all jit kernels including jitcode, mkl and refer.
// To use this tool, run command: ./benchmark [options...] // To use this tool, run command: ./benchmark [options...]
// Options: // Options:
// --burning: the burning time before count // --burning: the burning time before count
// --repeat: the repeat times // --repeat: the repeat times
// --max_size: the max size would be tested // --max_size: the max size would be tested
// --filter: the bench name would be run
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
gflags::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]); google::InitGoogleLogging(argv[0]);
LOG(INFO) << "Burning " << FLAGS_burning << " times, Repeat " << FLAGS_repeat LOG(INFO) << "Burning " << FLAGS_burning << " times, Repeat " << FLAGS_repeat
<< " times."; << " times.";
using T = float;
using PlaceType = paddle::platform::CPUPlace;
// xyzn
BenchXYZNKernel<jit::kVMul, T, PlaceType>();
BenchXYZNKernel<jit::kVAdd, T, PlaceType>();
BenchXYZNKernel<jit::kVAddRelu, T, PlaceType>();
BenchXYZNKernel<jit::kVSub, T, PlaceType>();
// axyn
BenchAXYNKernel<jit::kVScal, T, PlaceType>();
BenchAXYNKernel<jit::kVAddBias, T, PlaceType>();
// xyn
BenchXYNKernel<jit::kVRelu, T, PlaceType>();
BenchXYNKernel<jit::kVIdentity, T, PlaceType>();
BenchXYNKernel<jit::kVSquare, T, PlaceType>();
BenchXYNKernel<jit::kVExp, T, PlaceType>();
BenchXYNKernel<jit::kVSigmoid, T, PlaceType>();
BenchXYNKernel<jit::kVTanh, T, PlaceType>();
// lstm and peephole
BenchLSTMKernel<jit::kLSTMCtHt, T, PlaceType>();
BenchLSTMKernel<jit::kLSTMC1H1, T, PlaceType>();
// gru functions
BenchGRUKernel<jit::kGRUH1, T, PlaceType>();
BenchGRUKernel<jit::kGRUHtPart1, T, PlaceType>();
BenchGRUKernel<jit::kGRUHtPart2, T, PlaceType>();
// seq pool function
BenchSeqPoolKernel<jit::kSeqPool, T, PlaceType>();
// matmul RUN_ALL_BENCHMARK();
BenchMatMulKernel<jit::kMatMul, T, PlaceType>();
} }
...@@ -22,6 +22,8 @@ ...@@ -22,6 +22,8 @@
#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
static double acc = 1e-5;
template <typename T> template <typename T>
void RandomVec(const int n, T* a, const T lower = static_cast<T>(-20.f), void RandomVec(const int n, T* a, const T lower = static_cast<T>(-20.f),
const T upper = static_cast<T>(20.f)) { const T upper = static_cast<T>(20.f)) {
...@@ -37,7 +39,7 @@ template <typename T> ...@@ -37,7 +39,7 @@ template <typename T>
void ExpectEQ(const T* target, const T* refer, int n) { void ExpectEQ(const T* target, const T* refer, int n) {
if (std::is_floating_point<T>::value) { if (std::is_floating_point<T>::value) {
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
EXPECT_NEAR(target[i], refer[i], 1e-5); EXPECT_NEAR(target[i], refer[i], acc);
} }
} else { } else {
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
...@@ -62,7 +64,9 @@ namespace jit = paddle::operators::jit; ...@@ -62,7 +64,9 @@ namespace jit = paddle::operators::jit;
template <typename KernelTuples, typename... Args> template <typename KernelTuples, typename... Args>
struct TestFuncWithRefer { struct TestFuncWithRefer {
void operator()(const typename KernelTuples::func_type tgt, Args... args) {} void operator()(const typename KernelTuples::func_type tgt, Args... args) {
LOG(FATAL) << "Should specify this function.";
}
}; };
template <typename T> template <typename T>
...@@ -140,7 +144,8 @@ struct TestFuncWithRefer<jit::XYNTuples<T>, std::vector<T>, std::vector<T>> { ...@@ -140,7 +144,8 @@ struct TestFuncWithRefer<jit::XYNTuples<T>, std::vector<T>, std::vector<T>> {
template <typename T> template <typename T>
struct TestFuncWithRefer<jit::LSTMTuples<T>, std::vector<T>, std::vector<T>, struct TestFuncWithRefer<jit::LSTMTuples<T>, std::vector<T>, std::vector<T>,
std::vector<T>, std::vector<T>, std::vector<T>> { std::vector<T>, std::vector<T>, std::vector<T>,
typename jit::LSTMTuples<T>::attr_type> {
void operator()(const typename jit::LSTMTuples<T>::func_type tgt, void operator()(const typename jit::LSTMTuples<T>::func_type tgt,
const std::vector<T>& xsrc, const std::vector<T>& wp, const std::vector<T>& xsrc, const std::vector<T>& wp,
const std::vector<T>& ct_1, const std::vector<T>& ct_ref, const std::vector<T>& ct_1, const std::vector<T>& ct_ref,
...@@ -185,7 +190,8 @@ struct TestFuncWithRefer<jit::LSTMTuples<T>, std::vector<T>, std::vector<T>, ...@@ -185,7 +190,8 @@ struct TestFuncWithRefer<jit::LSTMTuples<T>, std::vector<T>, std::vector<T>,
template <typename T> template <typename T>
struct TestFuncWithRefer<jit::GRUTuples<T>, std::vector<T>, std::vector<T>, struct TestFuncWithRefer<jit::GRUTuples<T>, std::vector<T>, std::vector<T>,
std::vector<T>> { std::vector<T>,
typename jit::GRUTuples<T>::attr_type> {
void operator()(const typename jit::GRUTuples<T>::func_type tgt, void operator()(const typename jit::GRUTuples<T>::func_type tgt,
const std::vector<T>& xsrc, const std::vector<T>& ht_1, const std::vector<T>& xsrc, const std::vector<T>& ht_1,
const std::vector<T>& ht_ref, const std::vector<T>& ht_ref,
...@@ -212,8 +218,8 @@ struct TestFuncWithRefer<jit::GRUTuples<T>, std::vector<T>, std::vector<T>, ...@@ -212,8 +218,8 @@ struct TestFuncWithRefer<jit::GRUTuples<T>, std::vector<T>, std::vector<T>,
}; };
template <typename T> template <typename T>
struct TestFuncWithRefer<jit::SeqPoolTuples<T>, std::vector<T>, struct TestFuncWithRefer<jit::SeqPoolTuples<T>, std::vector<T>, std::vector<T>,
std::vector<T>> { typename jit::SeqPoolTuples<T>::attr_type> {
void operator()(const typename jit::SeqPoolTuples<T>::func_type tgt, void operator()(const typename jit::SeqPoolTuples<T>::func_type tgt,
const std::vector<T>& x, const std::vector<T>& yref, const std::vector<T>& x, const std::vector<T>& yref,
const typename jit::SeqPoolTuples<T>::attr_type& attr) { const typename jit::SeqPoolTuples<T>::attr_type& attr) {
...@@ -385,8 +391,8 @@ void TestLSTMKernel() { ...@@ -385,8 +391,8 @@ void TestLSTMKernel() {
std::vector<T> xsrc(4 * d), wp(3 * d), ct_1(d); std::vector<T> xsrc(4 * d), wp(3 * d), ct_1(d);
std::vector<T> ct_ref(d), ht_ref(d), checked(2 * d); std::vector<T> ct_ref(d), ht_ref(d), checked(2 * d);
RandomVec<T>(4 * d, xsrc.data(), -2.f, 2.f); RandomVec<T>(4 * d, xsrc.data(), -2.f, 2.f);
RandomVec<T>(3 * d, wp.data(), -2.f, 2.f); RandomVec<T>(3 * d, wp.data(), -1.f, 1.f);
RandomVec<T>(d, ct_1.data(), -2.f, 2.f); RandomVec<T>(d, ct_1.data(), -1.f, 1.f);
// x could be changed after compute, so copy to save src // x could be changed after compute, so copy to save src
std::vector<T> x(xsrc.size()); std::vector<T> x(xsrc.size());
std::copy(xsrc.begin(), xsrc.end(), x.begin()); std::copy(xsrc.begin(), xsrc.end(), x.begin());
...@@ -481,14 +487,17 @@ void TestSeqPoolKernel() { ...@@ -481,14 +487,17 @@ void TestSeqPoolKernel() {
template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType> template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
void TestMatMulKernel() { void TestMatMulKernel() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
auto last_acc = acc;
// TODO(intel): this should be acc issue of MKL
acc = 1e-3;
for (int m : {1, 2, 3, 4}) { for (int m : {1, 2, 3, 4}) {
for (int n : {1, 2, 3, 4}) { for (int n : {1, 2, 3, 4}) {
for (int k : TestSizes()) { for (int k : TestSizes()) {
auto ref = jit::GetRefer<KT, jit::MatMulTuples<T>>(); auto ref = jit::GetRefer<KT, jit::MatMulTuples<T>>();
EXPECT_TRUE(ref != nullptr); EXPECT_TRUE(ref != nullptr);
std::vector<T> a(m * k), b(k * n), c(m * n); std::vector<T> a(m * k), b(k * n), c(m * n);
RandomVec<T>(m * k, a.data(), -0.2f, 0.2f); RandomVec<T>(m * k, a.data(), -2.f, 2.f);
RandomVec<T>(k * n, b.data(), -0.2f, 0.2f); RandomVec<T>(k * n, b.data(), -2.f, 2.f);
const T* a_data = a.data(); const T* a_data = a.data();
const T* b_data = b.data(); const T* b_data = b.data();
T* c_data = c.data(); T* c_data = c.data();
...@@ -498,6 +507,7 @@ void TestMatMulKernel() { ...@@ -498,6 +507,7 @@ void TestMatMulKernel() {
} }
} }
} }
acc = last_acc;
} }
template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType> template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册