提交 5f0e7ffb 编写于 作者: M Megvii Engine Team

feat(fallback): add FB_GI_F32_4x12 benchmark

GitOrigin-RevId: cfacf31b2801df7e15df3f8bb9ab34f85ae531b7
上级 f249d387
#include "src/fallback/matrix_mul/generic_strategy.h"
#include "src/fallback/matrix_mul/gi/fp32/common.h"
using namespace megdnn;
using namespace matmul::fallback;
namespace {
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wuninitialized" #pragma GCC diagnostic ignored "-Wuninitialized"
...@@ -18,6 +10,15 @@ namespace { ...@@ -18,6 +10,15 @@ namespace {
#endif #endif
#endif #endif
#endif #endif
#include "src/fallback/matrix_mul/generic_strategy.h"
#include "src/fallback/matrix_mul/gi/fp32/common.h"
using namespace megdnn;
using namespace matmul::fallback;
namespace {
void kern_4x12( void kern_4x12(
const float* packA, const float* packB, int K, float* output, int LDC, const float* packA, const float* packB, int K, float* output, int LDC,
bool is_first_k, int m_remain) { bool is_first_k, int m_remain) {
...@@ -615,7 +616,6 @@ void kern_4x4( ...@@ -615,7 +616,6 @@ void kern_4x4(
} }
} }
} }
#pragma GCC diagnostic pop
void gi_sgemm_4x12_pack_A_n( void gi_sgemm_4x12_pack_A_n(
float* outptr, const float* inptr, int ldin, int y0, int ymax, int k0, float* outptr, const float* inptr, int ldin, int y0, int ymax, int k0,
......
...@@ -571,6 +571,12 @@ TEST_F(ARMV7, BENCHMARK_MATRIX_MUL_INT32_MK_4X2X16) { ...@@ -571,6 +571,12 @@ TEST_F(ARMV7, BENCHMARK_MATRIX_MUL_INT32_MK_4X2X16) {
} }
} }
TEST_F(ARMV7, BENCHMARK_MATRIX_MUL_ARMV7_F32) {
auto args = matrix_mul::get_benchmark_matmul_args();
matrix_mul::benchmark_single_algo(
handle(), args, dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
"ARMV7_F32", param::MatrixMul::Format::DEFAULT);
}
#endif #endif
// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen
...@@ -429,6 +429,68 @@ void matrix_mul::benchmark_with_contrast( ...@@ -429,6 +429,68 @@ void matrix_mul::benchmark_with_contrast(
} }
} }
void matrix_mul::benchmark_single_algo(
Handle* handle, const std::vector<TestArg>& args, DType A_dtype, DType B_dtype,
DType C_dtype, const char* algo, param::MatrixMul::Format format) {
using Param = MatrixMul::Param;
megdnn_assert(A_dtype.enumv() == B_dtype.enumv());
Benchmarker<MatrixMul> benchmark(handle);
constexpr size_t RUNS = 50;
if (algo) {
benchmark.set_before_exec_callback(AlgoChecker<MatrixMul>(algo));
}
benchmark.set_dtype(0, A_dtype).set_dtype(1, B_dtype).set_dtype(2, C_dtype);
benchmark.set_times(RUNS);
auto bench = [](Benchmarker<MatrixMul>& benchmark, Param param,
param::MatrixMul::Format format, size_t m, size_t n, size_t k,
size_t pack_size) -> float {
param.format = format;
benchmark.set_param(param);
float used_algo = 1.0;
if (format == param::MatrixMul::Format::DEFAULT) {
size_t A0 = m * pack_size, A1 = k * pack_size, B0 = k * pack_size, B1 = n;
TensorShape A, B;
if (param.transposeA) {
std::swap(A0, A1);
}
if (param.transposeB) {
std::swap(B0, B1);
}
used_algo = benchmark.execs({{A0, A1}, {B0, B1}, {}}) / RUNS;
} else {
size_t A0 = m, A1 = k, B0 = k, B1 = n;
if (param.transposeA) {
std::swap(A0, A1);
}
if (param.transposeB) {
std::swap(B0, B1);
}
used_algo =
benchmark.execs(
{{A0, A1, pack_size, pack_size}, {B0, B1, pack_size}, {}}) /
RUNS;
}
return used_algo;
};
size_t pack_size = MatrixMulForward::pack_size(format);
for (auto& arg : args) {
Param param;
param.transposeA = arg.mask & 0x1;
param.transposeB = arg.mask & 0x2;
auto used_algo =
bench(benchmark, param, format, arg.m, arg.n, arg.k, pack_size);
float computations = 2.f * arg.m * pack_size * arg.k * pack_size * arg.n * 1e-6;
printf("run: {(%zu, %zu) x (%zu, %zu)} %f ms %f Gflops\n", arg.m * pack_size,
arg.k * pack_size, arg.k * pack_size, arg.n, used_algo,
computations / used_algo);
}
}
#endif #endif
// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen
...@@ -85,6 +85,10 @@ void benchmark_with_contrast( ...@@ -85,6 +85,10 @@ void benchmark_with_contrast(
DType contrast_B_dtype = dtype::Float32{}, DType contrast_B_dtype = dtype::Float32{},
DType contrast_C_dtype = dtype::Float32{}, const char* contrast_algo = nullptr, DType contrast_C_dtype = dtype::Float32{}, const char* contrast_algo = nullptr,
param::MatrixMul::Format contrast_format = param::MatrixMul::Format::DEFAULT); param::MatrixMul::Format contrast_format = param::MatrixMul::Format::DEFAULT);
void benchmark_single_algo(
Handle* handle, const std::vector<TestArg>& args, DType A_dtype, DType B_dtype,
DType C_dtype, const char* algo = nullptr,
param::MatrixMul::Format format = param::MatrixMul::Format::DEFAULT);
#endif #endif
} // namespace matrix_mul } // namespace matrix_mul
......
...@@ -154,6 +154,16 @@ TEST_F(FALLBACK, BATCHED_MATRIX_MUL) { ...@@ -154,6 +154,16 @@ TEST_F(FALLBACK, BATCHED_MATRIX_MUL) {
checker.execs({AL, BL, {}}); checker.execs({AL, BL, {}});
} }
} }
#if MEGDNN_WITH_BENCHMARK
TEST_F(FALLBACK, BENCHMARK_MATRIX_MUL_FB_GI_F32_4x12) {
auto args = matrix_mul::get_benchmark_matmul_args();
matrix_mul::benchmark_single_algo(
handle(), args, dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
"FB_GI_F32_4x12", param::MatrixMul::Format::DEFAULT);
}
#endif
} // namespace test } // namespace test
} // namespace megdnn } // namespace megdnn
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册