提交 34773ba3 编写于 作者: M Megvii Engine Team

fix(mgb/gopt): tensorcore pass replace BatchConvBias inputs to nchw4

GitOrigin-RevId: 3ff3c422fb81a1a40735ce612f29a9547886c547
上级 efe3cfe7
......@@ -555,6 +555,7 @@ std::unique_ptr<EnableTensorCorePass> EnableTensorCorePass::
replace_func[opr::Reshape::typeinfo()] = replace_inps_to_nchw4;
replace_func[opr::GetVarShape::typeinfo()] = replace_inps_to_nchw4;
replace_func[opr::Dimshuffle::typeinfo()] = replace_inps_to_nchw4;
replace_func[opr::BatchConvBias::typeinfo()] = replace_inps_to_nchw4;
return ret;
MIDOUT_E
}
......
......@@ -2244,6 +2244,67 @@ TEST(TestEnableTensorCore, Pooling) {
MGB_ASSERT_TENSOR_EQ(host_y, host_y_opt);
}
TEST(TestEnableTensorCore, BatchConvBias) {
REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0");
cn.activate();
auto&& prop = CompNodeEnv::from_comp_node(cn).cuda_env().device_prop;
auto sm_ver = prop.major * 10 + prop.minor;
if (sm_ver < 75) {
printf("This testcast ignored due to insufficient cuda cap(got: %d, "
"expected: %d)\n",
sm_ver, 75);
return;
}
HostTensorGenerator<dtype::Int8> gen;
auto graph = ComputingGraph::make();
graph->options().graph_opt_level = 0;
auto mkvar = [&](const char* name, const TensorShape& shp, const DType& dtype) {
return opr::TypeCvt::make(
opr::Host2DeviceCopy::make(*graph, gen(shp, cn)).rename(name), dtype);
};
auto mkcvar = [&](const char* name, const TensorShape& shp, const DType& dtype) {
return opr::TypeCvt::make(
opr::SharedDeviceTensor::make(*graph, *gen(shp, cn)).rename(name),
dtype);
};
auto inp = mkvar("inp", {32, 24, 24, 24, 4}, dtype::QuantizedS8(1.1f)),
flt = mkcvar("flt", {32, 96, 24, 1, 1, 4}, dtype::QuantizedS8(1.2f)),
bias = mkcvar("bias", {1, 24, 1, 1, 4}, dtype::QuantizedS32{1.1f * 1.2f});
opr::BatchConvBias::Param param;
param.format = opr::BatchConvBias::Param::Format::NCHW4;
param.stride_h = param.stride_w = 1;
param.pad_h = param.pad_w = 0;
auto y = opr::BatchConvBias::make(
inp, flt, bias, param, {}, OperatorNodeConfig{dtype::QuantizedS8{1.3f}});
y = opr::TypeCvt::make(y, dtype::Float32());
SymbolVar y_opt;
SymbolVar y_no_tc;
{
auto options = gopt::OptimizeForInferenceOptions{};
options.enable_fuse_conv_bias_nonlinearity().enable_nchw32();
unpack_vector(gopt::optimize_for_inference({y}, options), y_opt);
}
ASSERT_EQ(
opr::BatchConvBias::Param::Format::NCHW4,
find_opr<opr::BatchConvBias>(y_opt).param().format);
{
auto options = gopt::OptimizeForInferenceOptions{};
options.enable_fuse_conv_bias_nonlinearity();
unpack_vector(gopt::optimize_for_inference({y}, options), y_no_tc);
}
HostTensorND host_y, host_y_opt;
auto func = graph->compile(
{make_callback_copy(y_no_tc, host_y),
make_callback_copy(y_opt, host_y_opt)});
func->execute();
MGB_ASSERT_TENSOR_EQ(host_y, host_y_opt);
}
TEST(TestGoptInference, EnableTensorCore) {
REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册