提交 0708bc78 编写于 作者: M Megvii Engine Team

fix(dnn/cuda): disallow implicit dtype conversion in cublaslt matmul algos

disable tensor op matmul kernels when input and output tensors are in f32 data type to avoid potential accuracy loss

GitOrigin-RevId: 36859cba5a526a7778e12c03ac32815144fe0505
上级 3f01112a
......@@ -313,6 +313,19 @@ bool CUBLASLTMatmulDesc::get_algorithm_heuristic(const SizeArgs& args,
cublas_check(cublasLtMatmulPreferenceSetAttribute(
algo_pref, CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES, &algo_ws_limit,
sizeof(algo_ws_limit)));
#if CUDA_VERSION < 11000
bool is_f32_config = args.layout_a.dtype == dtype::Float32() &&
args.layout_b.dtype == dtype::Float32() &&
args.layout_c.dtype == dtype::Float32();
if (is_f32_config) {
// disable HMMA tensor op matmul when inputs and output are all f32
// tensors, to avoid the potential accuracy loss
uint32_t math_mode = CUBLAS_DEFAULT_MATH;
cublas_check(cublasLtMatmulPreferenceSetAttribute(
algo_pref, CUBLASLT_MATMUL_PREF_MATH_MODE_MASK, &math_mode,
sizeof(math_mode)));
}
#endif
status = cublasLtMatmulAlgoGetHeuristic(
cublasLt_handle, matmul_desc,
dt_c == CUDA_R_32I ? layout_trans_b : layout_b,
......
......@@ -215,6 +215,7 @@ std::vector<BenchArgs> get_feat_model_args() {
return args;
}
#if CUDA_VERSION >= 10020
std::vector<BenchArgs> get_f16_feat_model_args() {
std::vector<BenchArgs> args;
args.emplace_back(BenchArgs{128, 9216, 9216});
......@@ -222,6 +223,7 @@ std::vector<BenchArgs> get_f16_feat_model_args() {
args.emplace_back(BenchArgs{128, 5184, 5184});
return args;
}
#endif
void benchmark_matrix_mul(
Handle* handle, const std::vector<BenchArgs>& args, DType A_dtype,
......
......@@ -473,7 +473,34 @@ TEST_F(CUDA, MATRIX_MUL_CUBLASLT_INT8) {
execs({A, B, {}});
}
}
TEST_F(CUDA, MATRIX_MUL_CUBLASLT_F32) {
require_compute_capability(7, 5);
size_t m = 128, n = 1024, k = 18432;
Checker<MatrixMul> checker(handle_cuda());
checker.set_before_exec_callback(
AlgoChecker<MatrixMulForward>("CUBLAS_LT"));
using Param = MatrixMul::Param;
Param param;
DType stype = dtype::Float32();
DType dtype = dtype::Float32();
TensorShape A, B;
param.transposeA = param.transposeB = 0;
if (param.transposeA)
A = TensorShape{k, m};
else
A = TensorShape{m, k};
if (param.transposeB)
B = TensorShape{n, k};
else
B = TensorShape{k, n};
checker.set_param(param)
.set_dtype(0, stype)
.set_dtype(1, stype)
.set_dtype(2, dtype)
.execs({A, B, {}});
}
} // namespace test
} // namespace megdnn
// vim: syntax=cpp.doxygen
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册