提交 3eb0505f 编写于 作者: M Megvii Engine Team

feat(imperative): add support for quantized conv transpose2d

GitOrigin-RevId: ffd6431299b2ae008fbdd1eed6458437e6b6a45f
上级 a8309889
......@@ -67,7 +67,6 @@ option(MGE_WITH_ROCM "Enable ROCM support" OFF)
option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF)
option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF)
if(MSVC OR WIN32)
message(STATUS "windows force cudnn static link")
set(MGE_WITH_CUDNN_SHARED OFF)
......@@ -332,7 +331,6 @@ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
if(NOT MGE_WITH_JIT)
if(MGE_WITH_HALIDE)
message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
......@@ -728,7 +726,6 @@ if (MGE_WITH_ROCM)
include(cmake/rocm.cmake)
endif ()
if(MGE_WITH_ATLAS)
add_subdirectory(dnn/atlas-stub)
list(APPEND MGE_ATLAS_LIBS atlas-stub)
......@@ -736,7 +733,6 @@ if(MGE_WITH_ATLAS)
set(MGB_ATLAS ${MGE_WITH_ATLAS})
endif()
find_program(CCACHE_BIN ccache)
if(CCACHE_BIN)
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
......@@ -834,12 +830,10 @@ endif()
set(MGB_CUDA ${MGE_WITH_CUDA})
set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA})
#ROCM
set(MGB_ROCM ${MGE_WITH_ROCM})
set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM})
# CAMBRICON
set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON})
set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON})
......@@ -1029,7 +1023,6 @@ if(MGE_BUILD_SDK)
add_subdirectory(sdk/load-and-run)
endif()
if(MGE_BUILD_IMPERATIVE_RT)
add_subdirectory(imperative)
message(STATUS "Enable imperative python wrapper runtime")
......@@ -1117,4 +1110,3 @@ if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER
message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
endif()
include(ExternalProject)
find_package(LLVM 6.0 REQUIRED CONFIG)
......
......@@ -38,7 +38,6 @@ list(APPEND OPR_PARAM_DEFS_OUTS
)
list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR})
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h")
add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS})
......@@ -56,7 +55,6 @@ endforeach()
add_dependencies(opr_param_defs _opr_param_defs)
install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS})
if(MGE_WITH_CUDA)
add_library(cutlass INTERFACE)
target_include_directories(cutlass
......
......@@ -13,7 +13,6 @@
#if !defined(__CUDACC__) && !defined(__HIPCC__)
#endif // !defined(__CUDACC__)
// vim: syntax=cpp.doxygen
......@@ -90,7 +90,6 @@ class Handle {
std::unique_ptr<opr> create_rocm_operator();
#endif
virtual ~Handle();
/*!
......
......@@ -137,11 +137,9 @@ if(MGE_WITH_CUDA)
gen_cutlass_kimpl(conv2d tensorop8832)
file(GLOB_RECURSE CUTLASS_SOURCES ${CUTLASS_GEN_DIR}/*.cu)
list(APPEND SOURCES ${CUTLASS_SOURCES})
list(APPEND SOURCES ${CUSOURCES})
endif()
if(MGE_WITH_CAMBRICON)
file(GLOB_RECURSE SOURCES_ cambricon/*.cpp)
list(APPEND SOURCES ${SOURCES_})
......@@ -161,7 +159,6 @@ if(MGE_WITH_ATLAS)
list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1)
endif()
add_definitions(${LIBMEGDNN_DEF})
add_library(megdnn EXCLUDE_FROM_ALL OBJECT ${SOURCES})
......@@ -186,7 +183,6 @@ if(MGE_WITH_ROCM)
${AMDOCL_LIBRARY_DIR})
endif()
if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64")
if(MGE_ENABLE_CPUINFO)
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>)
......
......@@ -15,5 +15,4 @@
#pragma message "Mangling is disabled."
#endif // MEGDNN_ENABLE_MANGLING
// vim: syntax=cpp.doxygen
......@@ -31,13 +31,10 @@
#include "src/aarch64/handle.h"
#endif
#if MEGDNN_WITH_CUDA
#include "src/cuda/handle.h"
#endif
#if MEGDNN_WITH_CAMBRICON
#include "src/cambricon/handle.h"
#endif
......@@ -128,7 +125,6 @@ std::unique_ptr<Handle> Handle::make(megcoreComputingHandle_t computing_handle,
return nullptr;
}
void Handle::set_destructor(const thin_function<void()>& d) {
megdnn_assert(!m_destructor, "destructor can be set only once");
m_destructor = d;
......
......@@ -17,8 +17,6 @@
#include "src/cuda/megcore/cuda_computing_context.hpp"
#endif
#if MEGDNN_WITH_ROCM
#include "src/rocm/megcore/computing_context.hpp"
#endif
......
......@@ -880,7 +880,6 @@ void remap(const Mat<T>& src, Mat<T>& dst, Mat<short>& map1, Mat<ushort>& map2,
for (; x1 <= bcols - 8; x1 += 8)
vst1q_u16(A + x1,
vandq_u16(vld1q_u16(sA + x1), v_scale));
#endif
for (; x1 < bcols; ++x1)
A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2 - 1));
......
......@@ -287,7 +287,6 @@ void ConvBiasForwardImpl::AlgoPack::fill_dp4a_algos() {
int8_nchw4_dotprod.emplace_back(AlgoParam{16, 64, 8, 16, 64, 8, 2});
}
ConvBiasForwardImpl::AlgoBase*
ConvBiasForwardImpl::AlgoPack::cudnn_conv_from_enum(
cudnnConvolutionFwdAlgo_t algo) {
......
......@@ -1037,7 +1037,6 @@ private:
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const;
};
class ConvBiasForwardImpl::AlgoPack : NonCopyableObj {
private:
AlgoBase::Mapper m_all_algos_map;
......
......@@ -10,7 +10,6 @@
*/
#include "src/common/utils.h"
namespace {
template <bool is_xcorr, typename dtype>
......
......@@ -34,7 +34,6 @@ if(MGE_WITH_CAMBRICON)
list(APPEND SOURCES ${SOURCES_})
endif()
if(MGE_WITH_ATLAS)
file(GLOB_RECURSE SOURCES_ atlas/*.cpp)
list(APPEND SOURCES ${SOURCES_})
......@@ -45,8 +44,6 @@ if (MGE_WITH_ROCM)
list (APPEND SOURCES ${SOURCES_})
endif()
add_executable(megdnn_test ${SOURCES})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
target_link_libraries(megdnn_test gtest)
......@@ -60,7 +57,6 @@ if(MGE_WITH_ATLAS)
target_link_libraries(megdnn_test atlas-stub)
endif()
target_include_directories(megdnn_test
PRIVATE
${PROJECT_SOURCE_DIR}/third_party/midout/src
......
......@@ -494,7 +494,6 @@ std::vector<TestArg> get_int8_nchw44_args(size_t kernel_size, size_t pack_size,
return args;
}
std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) {
std::vector<TestArg> args;
param::ConvBias cur_param;
......@@ -530,7 +529,6 @@ std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) {
return args;
}
std::vector<TestArg> get_int8_nchw4_args_small_batch(size_t kernel_size) {
std::vector<TestArg> args;
param::ConvBias cur_param;
......@@ -974,7 +972,6 @@ void benchmark_winograd(const char* algo_name, Handle* handle, size_t kernel,
}
#endif // MEGDNN_WITH_BENCHMARK
std::vector<conv_bias::TestArg> get_conv_bias_args(
std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias,
bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) {
......@@ -1188,7 +1185,6 @@ void check_conv_bias_preprocess(std::vector<conv_bias::TestArg> args,
}
}
void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, Handle* handle,
RNG* rng, float epsilon, DType type0, DType type1,
DType type2, DType type3, const char* algo_name) {
......
......@@ -93,7 +93,6 @@ void check_conv_bias(std::vector<megdnn::test::conv_bias::TestArg> args,
void checker_conv_bias_int8x8x16(
std::vector<megdnn::test::conv_bias::TestArg> args,
megdnn::Handle* handle, const char* algo_name);
void checker_conv_bias_common(std::vector<conv_bias::TestArg> args,
Handle* handle, RNG* rng, float epsilon,
DType type0, DType type1, DType type2,
......
......@@ -1145,7 +1145,6 @@ TEST(SmallVectorTest, SwapMoveOnly) {
}
}
}
} // anonymous namespace
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -40,7 +40,6 @@ TensorLayout make_layout(std::initializer_list<size_t> shape,
}
} // anonymous namespace
#if MEGDNN_64_BIT
TEST(BASIC_TYPES, TOTAL_NR_ELEMS) {
TensorShape shp{1u<<31, 1u<<31};
......@@ -340,5 +339,4 @@ TEST(BASIC_TYPES, TENSOR_LAYOUT_FMT_LOW_BITS_VALID) {
LowbitsAlignedToBytesTensorFormat::make(4_z)),
MegDNNError);
}
// vim: syntax=cpp.doxygen
......@@ -697,7 +697,6 @@ TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_UNROLL_WIDTH_TENSORCORE_1x1_ALGO_2) {
conv_bias::get_int8_chwn4_args_small_batch(1));
}
TEST_F(CUDA, FALLBACK_CONV_QS8) {
require_compute_capability_eq(7, 5);
Checker<ConvBiasForward> checker(handle_cuda());
......@@ -1100,7 +1099,6 @@ TEST_F(CUDA, BENCHMARK_CONV_BIAS_INT8_NCHW4_NCHW) {
run({{16, 16, 46, 80, 4}, {32, 16, 3, 3, 4}, {1, 32, 1, 1}});
}
#if CUDA_VERSION >= 10020
TEST_F(CUDA, BENCHMARK_CUTLASS_CONV_BIAS_INT8_NCHW32) {
require_compute_capability(7, 5);
......
......@@ -32,7 +32,6 @@ TYPED_TEST(CUDA_ELEMWISE_MULTI_TYPE, run) {
elemwise_multi_type::run_test<TypeParam>(this->handle_cuda());
}
using Mode = ElemwiseMultiType::Param::Mode;
static void run_test(int arity, Checker<ElemwiseMultiType>& checker, Mode mode) {
for (auto type : std::vector<std::pair<DType, DType>>{
......
......@@ -22,7 +22,6 @@
using namespace megdnn;
using namespace test;
TEST_F(CUDA, SLEEP) {
auto opr = this->handle_cuda()->create_operator<megdnn::SleepForward>();
......@@ -53,6 +52,5 @@ TEST_F(CUDA, SLEEP) {
}
// vim: syntax=cpp.doxygen
......@@ -75,7 +75,6 @@ TEST_F(FALLBACK, CONV_BIAS_FORWARD) {
.execs({src_shape, filter_shape, bias_shape, {}, {}})
.execs({src_shape, filter_shape, bias_shape_channel, {}, {}});
}
}
std::vector<conv_bias::TestArg> get_conv_bias_args(
......@@ -236,7 +235,6 @@ TEST_F(FALLBACK_MULTI_THREADS, CONV_BIAS_FORWARD_QUANTIZED) {
"FALLBACK_NAIVE");
}
#if MEGDNN_WITH_BENCHMARK
TEST_F(FALLBACK, BENCHMARK_CONVBIAS) {
constexpr size_t RUNS = 10;
......
......@@ -139,3 +139,52 @@ def batch_conv_bias_activation(
)
(outputs,) = apply(op, inp, weight, bias)
return outputs
def conv_transpose2d(
inp: Tensor,
weight: Tensor,
bias: Tensor = None,
dtype=None,
stride: Union[int, Tuple[int, int]] = 1,
padding: Union[int, Tuple[int, int]] = 0,
dilation: Union[int, Tuple[int, int]] = 1,
groups: int = 1,
conv_mode="cross_correlation",
compute_mode="default",
) -> Tensor:
assert (
conv_mode.lower() == "cross_correlation"
or conv_mode.name == "CROSS_CORRELATION"
)
assert compute_mode.lower() == "default" or compute_mode.name == "DEFAULT"
if groups != 1:
raise NotImplementedError(
"group quantized transposed conv2d is not supported yet."
)
if bias is not None:
raise NotImplementedError(
"bias of quantized transposed conv2d is not supported yet."
)
pad_h, pad_w = _pair(padding)
stride_h, stride_w = _pair_nonzero(stride)
dilate_h, dilate_w = _pair_nonzero(dilation)
# should be replaced by Op with bias such as ConvolutionBackwardDataBias
op = builtin.ConvolutionBackwardData(
stride_h=stride_h,
stride_w=stride_w,
pad_h=pad_h,
pad_w=pad_w,
dilate_h=dilate_h,
dilate_w=dilate_w,
strategy=get_execution_strategy(),
dtype=dtype,
compute_mode=compute_mode,
mode=conv_mode,
)
(output,) = apply(op, weight, inp)
return output
......@@ -651,11 +651,11 @@ class ConvTranspose2d(_ConvNd):
# Assume format is NCHW
return (1, self.out_channels, 1, 1)
def forward(self, inp):
def calc_conv_transpose2d(self, inp, weight, bias):
return conv_transpose2d(
inp,
self.weight,
self.bias,
weight,
bias,
self.stride,
self.padding,
self.dilation,
......@@ -664,6 +664,9 @@ class ConvTranspose2d(_ConvNd):
self.compute_mode,
)
def forward(self, inp):
return self.calc_conv_transpose2d(inp, self.weight, self.bias)
class LocalConv2d(Conv2d):
r"""
......
......@@ -7,7 +7,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from .batch_matmul_activation import BatchMatMulActivation
from .concat import Concat
from .conv import Conv2d, ConvRelu2d
from .conv import Conv2d, ConvRelu2d, ConvTranspose2d
from .conv_bn import ConvBn2d, ConvBnRelu2d
from .elemwise import Elemwise
from .linear import Linear
......
......@@ -57,3 +57,42 @@ class ConvRelu2d(Conv2d):
def forward(self, inp):
return self.apply_quant_activation(F.relu(self.calc_conv_qat(inp)))
class ConvTranspose2d(Float.ConvTranspose2d, QATModule):
r"""
A :class:`~.QATModule` :class:`~.module.ConvTranspose2d` with QAT support.
Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`.
"""
def calc_conv_transpose2d_qat(self, inp):
w_qat = self.apply_quant_weight(self.weight)
b_qat = self.apply_quant_bias(self.bias, inp, w_qat)
conv = self.calc_conv_transpose2d(inp, w_qat, b_qat)
return conv
@classmethod
def from_float_module(cls, float_module: Float.ConvTranspose2d):
r"""
Return a :class:`~.QATModule` instance converted from
a float :class:`~.Module` instance.
"""
qat_module = cls(
float_module.in_channels,
float_module.out_channels,
float_module.kernel_size,
float_module.stride,
float_module.padding,
float_module.dilation,
float_module.groups,
float_module.bias is not None,
float_module.conv_mode,
float_module.compute_mode,
name=float_module.name,
)
qat_module.weight = float_module.weight
qat_module.bias = float_module.bias
return qat_module
def forward(self, inp):
return self.apply_quant_activation(self.calc_conv_transpose2d_qat(inp))
......@@ -7,7 +7,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from .batch_matmul_activation import BatchMatMulActivation
from .concat import Concat
from .conv import Conv2d, ConvRelu2d
from .conv import Conv2d, ConvRelu2d, ConvTranspose2d
from .conv_bn import ConvBn2d, ConvBnRelu2d
from .elemwise import Elemwise
from .linear import Linear
......
......@@ -12,6 +12,7 @@ import numpy as np
from ... import module as Float
from ...core.tensor import dtype
from ...functional.nn import conv_bias_activation
from ...functional.quantized import conv_transpose2d
from ...tensor import Parameter
from ..qat import conv as QAT
from .module import QuantizedModule
......@@ -108,3 +109,98 @@ class ConvRelu2d(Conv2d):
def forward(self, inp):
return self.calc_conv_quantized(inp, nonlinear_mode="relu")
class ConvTranspose2d(Float.ConvTranspose2d, QuantizedModule):
r"""Quantized version of :class:`~.qat.ConvTranspose2d`.
Applies a 2D transposed convolution over a quantized input tensor, used
for inference only.
The parameter is same with :class:`~.module.ConvTranspose2d` but dtype.
:param dtype: data type of the output, should be qint8.
"""
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: Union[int, Tuple[int, int]],
stride: Union[int, Tuple[int, int]] = 1,
padding: Union[int, Tuple[int, int]] = 0,
dilation: Union[int, Tuple[int, int]] = 1,
groups: int = 1,
bias: bool = True,
conv_mode: str = "cross_correlation",
compute_mode: str = "default",
dtype=None,
**kwargs
):
super().__init__(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias,
conv_mode=conv_mode,
compute_mode=compute_mode,
)
self.output_dtype = dtype
@classmethod
def from_qat_module(cls, qat_module: QAT.ConvTranspose2d):
r"""
return a :class:`~.QuantizedModule` instance converted from a
:class:`~.QATModule` instance.
"""
output_dtype = qat_module.get_activation_dtype()
qconv = cls(
qat_module.in_channels,
qat_module.out_channels,
qat_module.kernel_size,
qat_module.stride,
qat_module.padding,
qat_module.dilation,
qat_module.groups,
qat_module.bias is not None,
qat_module.conv_mode,
qat_module.compute_mode,
dtype=output_dtype,
name=qat_module.name,
)
weight = qat_module.weight.astype(qat_module.get_weight_dtype())
qconv.weight = Parameter(weight.numpy(), name=qat_module.weight.name)
qconv.bias = (
Parameter(qat_module.bias.numpy(), name=qat_module.bias.name)
if qat_module.bias is not None
else None
)
return qconv
def calc_conv_transpose2d_quantized(self, inp):
if self.bias is not None:
inp_scale = dtype.get_scale(inp.dtype)
w_scale = dtype.get_scale(self.weight.dtype)
bias_scale = inp_scale * w_scale
return conv_transpose2d(
inp=inp,
weight=self.weight,
bias=self.bias.astype(dtype.qint32(bias_scale))
if self.bias is not None
else None,
dtype=self.output_dtype,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups,
conv_mode=self.conv_mode,
compute_mode=self.compute_mode,
)
def forward(self, inp):
return self.calc_conv_transpose2d_quantized(inp)
......@@ -13,5 +13,3 @@ from .fake_quant import _FakeQuantize
from .observer import MinMaxObserver
from .qconfig import QConfig
from .utils import QParams
......@@ -69,7 +69,6 @@ class PersistentCacheOnServer(_PersistentCache):
def make_user_prefix(cls):
return "mgbcache:{}".format(getpass.getuser())
def _make_key(self, category, key):
prefix_with_version = "{}:MGB{}".format(self._prefix, __version__)
return b"@".join(
......@@ -86,5 +85,3 @@ class PersistentCacheOnServer(_PersistentCache):
key = self._make_key(category, key)
self._prev_get_refkeep = conn.get(key)
return self._prev_get_refkeep
......@@ -38,7 +38,6 @@ class build_ext(_build_ext):
modpath = str(pathlib.Path(*modpath).resolve())
copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run)
package_name = 'MegEngine'
v = {}
......@@ -79,7 +78,6 @@ megengine_data += [
for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*')
]
with open('requires.txt') as f:
requires = f.read().splitlines()
with open('requires-style.txt') as f:
......@@ -108,8 +106,6 @@ setup_kwargs = dict(
cmdclass={'build_ext': build_ext},
scripts = ['./megengine/tools/mge'],
)
setup_kwargs.update(dict(
classifiers=[
'Development Status :: 3 - Alpha',
......
......@@ -876,8 +876,6 @@ def test_nms_is_same():
assert op3 != op4
def test_argmxx_on_inf():
def run_argmax():
x = F.zeros((100, 100))
......
......@@ -13,6 +13,7 @@ from megengine.module import (
Conv2d,
ConvBn2d,
ConvRelu2d,
ConvTranspose2d,
DequantStub,
Module,
QuantStub,
......@@ -202,3 +203,40 @@ def test_quantize_batchmatmul_activation():
infer_cg = cgtools.GraphInference(file)[0]
dumped_outputs = list(infer_cg.run(inputs.numpy()).values())[0]
np.testing.assert_allclose(quantize_outputs.numpy(), dumped_outputs, atol=1e-6)
def test_qat_conv_transpose2d():
in_channels = 32
out_channels = 64
kernel_size = 3
class TestNet(Module):
def __init__(self, bias):
super().__init__()
self.quant = QuantStub()
self.dequant = DequantStub()
self.conv = ConvTranspose2d(
in_channels, out_channels, kernel_size, bias=bias
)
def forward(self, inp):
out = self.quant(inp)
out = self.conv(out)
out = self.dequant(out)
return out
inputs = tensor(np.random.randn(4, in_channels, 32, 32).astype(np.float32))
for bias in [True, False]:
net = TestNet(bias)
net.train()
qat_net = quantize_qat(net, inplace=False)
disable_fake_quant(qat_net)
normal_outputs = net(inputs)
qat_outputs = qat_net(inputs)
np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy())
net.eval()
normal_outputs = net(inputs)
qat_net.eval()
qat_outputs = qat_net(inputs)
np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy())
......@@ -92,8 +92,6 @@ def test_tqt():
np.testing.assert_allclose(g_s.numpy(), g_s_np, rtol=5e-5, atol=5e-5)
def _save_to(self, name="grad"):
def callback(grad):
setattr(self, name, grad)
......
......@@ -14,6 +14,7 @@ import megengine.functional as F
from megengine.core.tensor import dtype
from megengine.device import get_device_count
from megengine.functional.elemwise import _elemwise_multi_type, _elwise
from megengine.module.quantized.conv import ConvTranspose2d
from megengine.quantization import QuantMode, create_qparams
......@@ -168,3 +169,94 @@ def test_conv_bias():
run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False, "relu")
run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, True, "relu")
def test_conv_transpose2d():
rng = np.random.RandomState(seed=2021)
def test_func(
N,
IC,
IH,
IW,
OC,
KH,
KW,
SH,
SW,
PH,
PW,
DH,
DW,
groups=1,
has_bias=True,
conv_mode: str = "cross_correlation",
compute_mode: str = "default",
):
inp_scale = np.float32(rng.uniform(low=0.04, high=0.06))
weight_scale = np.float32(rng.uniform(low=0.04, high=0.06))
bias_scale = inp_scale * weight_scale
out_scale = np.float32(rng.uniform(low=0.04, high=0.06))
inp_dtype = dtype.qint8(inp_scale)
weight_dtype = dtype.qint8(weight_scale)
bias_dtype = dtype.qint32(bias_scale)
out_dtype = dtype.qint8(out_scale)
inp_fp32 = rng.uniform(low=-1, high=1, size=(N, IC, IH, IW)).astype(np.float32)
weight_fp32 = rng.uniform(low=-1, high=1, size=(IC, OC, KH, KW)).astype(
np.float32
)
bias_fp32 = rng.uniform(low=-1, high=1, size=(1, OC, 1, 1)).astype(np.float32)
inp_int8 = dtype.convert_to_qint8(inp_fp32, inp_dtype)
weight_int8 = dtype.convert_to_qint8(weight_fp32, weight_dtype)
bias_int32 = dtype.convert_to_qint32(bias_fp32, bias_dtype)
inp_int8 = mge.tensor(inp_int8, dtype=inp_dtype)
weight_int8 = mge.Parameter(weight_int8, dtype=weight_dtype)
bias_int32 = mge.Parameter(bias_int32, dtype=bias_dtype)
inp_fp32 = inp_int8.astype("float32")
weight_fp32 = weight_int8.astype("float32")
bias_fp32 = bias_int32.astype("float32")
expected = F.conv_transpose2d(
inp_fp32,
weight_fp32,
bias_fp32 if has_bias else None,
stride=(SH, SW),
padding=(PH, PW),
dilation=(DH, DW),
groups=groups,
conv_mode=conv_mode,
compute_mode=compute_mode,
)
expected = dtype.convert_to_qint8(expected.numpy(), out_dtype)
expected = dtype.convert_from_qint8(expected)
conv_transpose2d = ConvTranspose2d(
in_channels=IC,
out_channels=OC,
kernel_size=(KH, KW),
stride=(SH, SW),
padding=(PH, PW),
dilation=(DH, DW),
groups=groups,
bias=has_bias,
conv_mode=conv_mode,
compute_mode=compute_mode,
dtype=out_dtype,
)
conv_transpose2d.weight = mge.Parameter(weight_int8)
if has_bias:
conv_transpose2d.bias = mge.Parameter(bias_int32)
result = conv_transpose2d.forward(inp_int8).numpy()
result = dtype.convert_from_qint8(result)
np.testing.assert_allclose(result, expected, atol=out_scale)
test_func(1, 4, 1, 1, 4, 1, 1, 1, 1, 0, 0, 1, 1, 1, False)
test_func(2, 4, 3, 1, 8, 1, 1, 1, 1, 0, 0, 1, 1, 1, False)
test_func(4, 4, 16, 16, 8, 3, 3, 1, 1, 1, 1, 1, 1, 1, False)
test_func(32, 64, 36, 28, 16, 3, 2, 1, 3, 1, 0, 1, 1, 1, False)
......@@ -486,8 +486,6 @@ def test_topk():
check_pygraph_dump(fwd, [x], [top, indices])
def test_random():
@trace(symbolic=True, capture_as_const=True)
def fwd():
......@@ -723,8 +721,6 @@ def test_elemwise_multitype():
check_pygraph_dump(fwd, [x, y], [result])
def test_cvtcolor():
inp = np.random.randn(3, 3, 3, 3).astype(np.float32)
x = Tensor(inp)
......
......@@ -7,4 +7,3 @@
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
__version__ = "1.6.0.dev"
......@@ -43,6 +43,11 @@ auto apply_on_var_node(
const VarNodeArray& inputs) {
auto&& conv = static_cast<const ConvolutionBackwardData&>(def);
OperatorNodeConfig config{conv.make_name()};
DType output_dtype = conv.dtype;
if (output_dtype.valid()) {
config.output_dtype(output_dtype);
}
if (inputs.size() == 2) {
return opr::ConvolutionBackwardData::make(inputs[0], inputs[1], conv.param(), conv.policy(), config);
} else {
......
......@@ -192,7 +192,6 @@ function do_build() {
#handle dlopen path
install_name_tool -change @rpath/libmegengine_export.dylib @loader_path/lib/libmegengine_export.dylib _imperative_rt.so
#copy megbrain_export lib
DEPEND_LIB=${BUILD_DIR}/staging/megengine/core/lib/
rm -rf ${DEPEND_LIB}
......@@ -209,7 +208,6 @@ function do_build() {
echo "comapt whl name: ${compat_whl_name}"
cp ${BUILD_DIR}/staging/dist/Meg*.whl ${MACOS_WHL_HOME}/${compat_whl_name}
cd ${SRC_DIR}
echo ""
echo "##############################################################################################"
......@@ -220,12 +218,10 @@ function do_build() {
done
}
function third_party_prepare() {
echo "init third_party..."
${SRC_DIR}/third_party/prepare.sh
if [[ -z ${ALREADY_INSTALL_MKL} ]]
then
echo "init third_party..."
......
......@@ -55,13 +55,11 @@ function patch_elf_depend_lib_mgb_mge() {
patchelf --force-rpath --set-rpath '$ORIGIN/.' ${LIBS_DIR}/libmegengine_export.so
handle_strip ${LIBS_DIR}/libmegengine_export.so
# as some version of cudnn/trt libs have dlopen libs, so we can not use auditwheel
# TODO: PR for auditwheel to support args for dlopen libs
handle_copy_cuda_libs ${LIBS_DIR}
}
SRC_DIR=$(readlink -f "`dirname $0`/../../../")
source ${SRC_DIR}/scripts/whl/utils/utils.sh
......@@ -142,7 +140,6 @@ do
mkdir -p staging
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/core
mkdir -p lib/ucx
patch_elf_depend_lib_mgb_mge
......@@ -158,7 +155,6 @@ do
echo "comapt whl name: ${compat_whl_name}"
mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME}/${compat_whl_name}
cd /home/output
chown -R ${UID}.${UID} .
# compat for root-less docker env to remove output at host side
......
......@@ -70,7 +70,6 @@ then
BUILD_WHL_CPU_ONLY="OFF"
fi
# config NVIDIA libs
TRT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/TensorRT-6.0.1.5/lib/nvinfer.dll"
CUDNN_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/cudnn-10.1-windows10-x64-v7.6.5.32/cuda/bin/cudnn64_7.dll"
......@@ -102,14 +101,11 @@ function copy_more_dll() {
# empty.file to triger setup.py to create a null empty
echo "empty" > ${CP_WHL_DST_IMP}/empty.file
if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then
echo "copy nvidia lib to whl use...."
depend_real_copy ${CP_WHL_DST_IMP}
fi
}
BUILD_DIR=${SRC_DIR}/build_dir/host/build/
# here we just treat cu file should not in the increment build file list
......@@ -194,14 +190,12 @@ function do_build() {
llvm-strip -s ${rt_file}
mv ${rt_file} _imperative_rt.pyd
copy_more_dll
cd ${BUILD_DIR}/staging
echo "call setup.py now"
${PYTHON_DIR}/python3 setup.py bdist_wheel
cp ${BUILD_DIR}/staging/dist/Meg*.whl ${WINDOWS_WHL_HOME}/
echo ""
echo "##############################################################################################"
echo "windows whl package location: ${WINDOWS_WHL_HOME}"
......@@ -215,7 +209,6 @@ function third_party_prepare() {
echo "init third_party..."
${SRC_DIR}/third_party/prepare.sh
if [[ -z ${ALREADY_INSTALL_MKL} ]]
then
echo "init third_party..."
......
......@@ -35,8 +35,6 @@
#include "megcore_atlas.h"
#endif
using namespace mgb;
/* =================== MegDNNHandle =================== */
......@@ -102,7 +100,6 @@ MegDNNHandle::MegDNNHandle(const CompNodeEnv& env) {
}
#endif
if (env.property().type == CompNode::DeviceType::CPU) {
megcoreCreateDeviceHandle(&m_dev_hdl, megcorePlatformCPU);
megcoreCreateComputingHandleWithCPUDispatcher(&m_comp_hdl, m_dev_hdl,
......@@ -234,7 +231,6 @@ void CompNodeEnv::init_cuda_async(int dev, CompNode comp_node,
}
#endif
#if MGB_ATLAS
void mgb::_on_atlas_error(const char* expr, int err, const char* file,
......@@ -258,8 +254,6 @@ void CompNodeEnv::init_atlas(CompNode comp_node, const AtlasEnv& env) {
}
#endif
#if MGB_ROCM
void mgb::_on_hip_error(const char* expr, hipError_t err, const char* file,
......@@ -381,7 +375,6 @@ void CompNodeEnv::init_cpu(const CpuEnv& env, CompNode comp_node) {
MegDNNHandle::get(*this).handle()->alignment_requirement();
}
#if MGB_CAMBRICON
void CompNodeEnv::init_cnrt(int dev, CompNode comp_node,
const ContinuationCtx<cnrtQueue_t>& cont) {
......@@ -446,7 +439,6 @@ void CompNodeEnv::fini() {
MGB_ATLAS_CHECK(aclrtDestroyStream(m_atlas_env.stream));
}
#endif
}
#if MGB_ENABLE_COMP_NODE_ASYNC_INIT
......
......@@ -73,14 +73,11 @@ std::string CudaError::get_cuda_extra_info() {
#endif
}
AtlasError::AtlasError(const std::string &msg):
SystemError(msg)
{
}
ROCmError::ROCmError(const std::string &msg):
SystemError(msg)
{
......
......@@ -23,7 +23,6 @@
#include "megbrain/graph/helper.h"
#include "megbrain/opr/utility.h"
#if MGB_ENABLE_TENSOR_RT
#include "megbrain/tensorrt/opr_replace.h"
#endif
......@@ -554,7 +553,6 @@ ComputingGraphImpl::CompileState ComputingGraphImpl::compile_prepare(
}
#endif
#if MGB_JIT
if (std::abs(options().graph_opt_level) == 0 &&
(options().graph_opt.jit || options().graph_opt.jit_config.enabled())) {
......
......@@ -445,7 +445,6 @@ class VarNodeMemManager {
SyncableCounter m_cpu_async_release_barrier;
#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM
//! release dynamic var on after compnode event finishes
class AsyncVarReleaser;
......
......@@ -508,7 +508,6 @@ class CompNode {
*/
static bool enable_affinity_for_cpu(bool flag);
protected:
//! ImplBase with env(); defined in CompNodeEnv
class Impl;
......
......@@ -19,8 +19,6 @@
#include "megdnn/handle.h"
#if MGB_CUDA
#include <cuda_runtime.h>
#include <cuda.h>
......@@ -90,8 +88,6 @@
#endif // MGB_ATLAS
#if MGB_ROCM
#include "hcc_detail/hcc_defs_prologue.h"
#include "megcore_rocm.h"
......@@ -196,7 +192,6 @@ namespace mgb {
const char* file, const char* func, int line);
#endif
#if MGB_CUDA
[[noreturn]] void _on_cuda_error(const char* expr, cudaError_t err,
const char* file, const char* func, int line);
......@@ -205,7 +200,6 @@ namespace mgb {
int line);
#endif
#if MGB_ROCM
[[noreturn]] void _on_hip_error(const char* expr, hipError_t err,
const char* file, const char* func, int line);
......@@ -232,7 +226,6 @@ public:
mgb_assert(0, "The CompNode set_affinity is not implement");
}
};
using AtlasDispatcher = CPUDispatcher;
/*!
......@@ -328,7 +321,6 @@ public:
}
#endif
}
/*!
......@@ -370,7 +362,6 @@ public:
const ContinuationCtx<cudaStream_t>& cont);
#endif
#if MGB_ATLAS
struct AtlasEnv {
int device = -1;
......@@ -431,8 +422,6 @@ public:
void init_atlas(CompNode comp_node, const AtlasEnv& env);
#endif
#if MGB_ROCM
struct ROCmEnv {
int device = -1;
......@@ -547,7 +536,6 @@ private:
CompNode m_comp_node;
Property m_property;
MemEventHandler m_mem_event_handler;
#if MGB_CUDA
CudaEnv m_cuda_env;
#endif
......
......@@ -71,7 +71,6 @@
}) \
do { \
} while (0)
namespace mgb {
//! the most general MegBrain exception type; also base class for all megbrain
......@@ -149,7 +148,6 @@ public:
AtlasError(const std::string& msg);
};
class ROCmError final : public SystemError {
public:
/*!
......@@ -224,7 +222,6 @@ public:
using MegBrainError::MegBrainError;
};
} // namespace mgb
namespace mgb {
......@@ -233,5 +230,4 @@ bool has_uncaught_exception();
} // namespace mgb
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -49,7 +49,11 @@ def SVD: MgbHashableOp<"SVD", [SVDParam]>;
def Convolution : MgbHashableOp<"Convolution", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>;
def ConvolutionBackwardData: MgbHashableOp<"ConvolutionBackwardData", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>;
def ConvolutionBackwardData: MgbHashableOp<"ConvolutionBackwardData", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]> {
let extraArguments = (ins
MgbDTypeAttr:$dtype
);
}
def Convolution3D: MgbHashableOp<"Convolution3D", [Convolution3DParam, ExecutionPolicyParamBase<"policy">]>;
......
......@@ -40,7 +40,6 @@ TEST(TestCompNode, Parse) {
ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23));
ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23));
ASSERT_EQ(L::parse("rocmx"), make_lc(D::ROCM, -1, 0));
ASSERT_EQ(L::parse("rocm2"), make_lc(D::ROCM, 2, 0));
ASSERT_EQ(L::parse("rocm2:3"), make_lc(D::ROCM, 2, 3));
......@@ -62,7 +61,6 @@ TEST(TestCompNode, Parse) {
ASSERT_EQ(L::parse("multithread:default:2"),
make_lc(D::MULTITHREAD, L::DEVICE_MULTITHREAD_DEFAULT, 2));
ASSERT_THROW(L::parse("apu"), MegBrainError);
ASSERT_THROW(L::parse("fpgbx"), MegBrainError);
ASSERT_THROW(L::parse("cab0"), MegBrainError);
......@@ -165,8 +163,6 @@ TEST(TestCompNode, Load) {
auto atlas1 = CompNode::load("atlas1");
ASSERT_NE(atlas0, atlas1);
#endif
}
TEST(TestCompNode, FreeAfterFinalize) {
......@@ -355,7 +351,6 @@ TEST(TestCompNodeAtlas, MemNode) {
}
#endif
TEST(TestCompNodeCPU, PhysicalDispatch) {
constexpr int ID = 0x2a6453e0;
using L = CompNode::Locator;
......@@ -754,7 +749,6 @@ TEST(TestCompNodeCambricon, P2PCopy) {
#endif
#endif // MGB_CAMBRICON
#if MGB_ATLAS
TEST(TestCompNodeAtlas, D2DCopy) {
......@@ -780,7 +774,6 @@ TEST(TestCompNodeAtlas, D2DCopy) {
}
#endif
namespace {
class CompNodeDepedentObjectInst final : public CompNodeDepedentObject {
int *m_dst, *m_timer;
......
......@@ -634,7 +634,6 @@ void test_gather_other(CompNode cn0, CompNode cn1) {
opr::Sleep::sleep(cn1, 0.7);
func->execute();
}
} // namespace
#if MGB_CUDA
......@@ -668,5 +667,4 @@ TEST(TestCudaMemAlloc, FreeMem) {
}
#endif // MGB_CUDA
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -340,7 +340,6 @@ TEST(TestTensor, ValueDump) {
auto val = debug::dump_tensor(*gen({23, 45}), "test");
debug::write_to_file(output_file("TestTensor.ValueDump.bin").c_str(), val);
}
template <class Src, class Dst>
void run_negative_index_test() {
constexpr size_t S0 = 200, S1 = 200;
......
......@@ -1912,7 +1912,6 @@ TEST_PASS(FuseConvBiasNonlinPass, Basic) {
}
}
#if MGB_CUDA
TEST(TestEnableTensorCore, SmallInputShape) {
......@@ -4735,7 +4734,6 @@ TEST(TestGoptInference, PaddingChannelsWithWarpPerspective) {
MGB_ASSERT_TENSOR_EQ(t1, t2);
}
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -67,7 +67,6 @@
#define MGB_CUDA 1
#endif
// whether to include file/line location for assert message
#ifndef MGB_ASSERT_LOC
#define MGB_ASSERT_LOC 1
......@@ -162,7 +161,6 @@
#define MGB_JIT_HALIDE 0
#endif
#ifndef MEGDNN_WITH_CAMBRICON
#define MEGDNN_WITH_CAMBRICON 0
#endif
......@@ -182,7 +180,6 @@
#define MGB_ENABLE_FASTRUN 1
#endif
/* ================= following are more finegrind controls ================= */
// whether to enable json dumper
......
......@@ -162,7 +162,6 @@ namespace opr {
using ReduceV2 = opr::Reduce;
MGB_SEREG_OPR(ReduceV2, 0);
} // namespace opr
using TypeCvtV2 = opr::TypeCvt;
MGB_SEREG_OPR(TypeCvtV2, 1);
......
......@@ -97,7 +97,6 @@ MGB_SEREG_OPR(SVD, 1);
} // namespace opr
} // namespace mgb
// vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -613,7 +613,6 @@ MGB_SEREG_OPR(LSQ, 4);
MGB_SEREG_OPR(LSQBackward, 5);
} // namespace opr
} // namespace mgb
// vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -196,7 +196,6 @@ using DctChannelSelectV1 = opr::DctChannelSelect;
MGB_SEREG_OPR(DctChannelSelectV1, 0);
} // namespace opr
} // namespace mgb
// vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -57,7 +57,6 @@ namespace serialization {
} // namespace serialization
namespace opr {
MGB_SEREG_OPR(Argmax, 1);
......
......@@ -14,7 +14,6 @@
namespace mgb {
namespace opr {
using UniformRNGV1 = opr::UniformRNG;
......
......@@ -120,7 +120,6 @@ namespace serialization {
#endif
} // namespace serialization
namespace opr {
MGB_SEREG_OPR(Broadcast, 2);
MGB_SEREG_OPR(Dimshuffle, 1);
......
......@@ -2401,7 +2401,6 @@ TEST(TestOprDNN, ConvolutionMultiCompNode) {
worker0.join();
worker1.join();
}
#endif
} // anonymous namespace
......
......@@ -37,7 +37,6 @@ GraphLoader::shared_tensor_name_map() {
}
return ret;
}
std::unique_ptr<GraphLoader> make_fbs_loader(std::unique_ptr<InputFile> file);
std::unique_ptr<GraphDumper> make_fbs_dumper(std::unique_ptr<OutputFile> file);
bool is_fbs_file(InputFile& file);
......
......@@ -502,5 +502,4 @@ TEST(TestExternCOpr, Dedup) {
ASSERT_EQ(0, MGBOprDescImpl<>::nr_inst);
}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -15,7 +15,6 @@ if (MGE_WITH_CUDA AND MGE_WITH_TRT)
list(APPEND SOURCES ${SOURCES_})
endif()
add_executable(megbrain_test ${SOURCES})
target_link_libraries(megbrain_test gtest gmock)
target_link_libraries(megbrain_test megbrain megdnn ${MGE_CUDA_LIBS})
......
......@@ -63,7 +63,6 @@ pdef('PersistentOutputStorage').add_fields(
'false')
)
(pdef('CollectiveComm', 'collective communication between multiple computing '
'nodes on localhost')
.add_enum(Doc('Mode', 'mode of collective communication'),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册