diff --git a/CMakeLists.txt b/CMakeLists.txt index 3af836e381c80e1787e9901e8828879e0eb48480..3429e3ee5e18cab88a5b189e1f18ab4af0796291 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,7 +67,6 @@ option(MGE_WITH_ROCM "Enable ROCM support" OFF) option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF) option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF) - if(MSVC OR WIN32) message(STATUS "windows force cudnn static link") set(MGE_WITH_CUDNN_SHARED OFF) @@ -332,7 +331,6 @@ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_ set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}") - if(NOT MGE_WITH_JIT) if(MGE_WITH_HALIDE) message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled") @@ -728,7 +726,6 @@ if (MGE_WITH_ROCM) include(cmake/rocm.cmake) endif () - if(MGE_WITH_ATLAS) add_subdirectory(dnn/atlas-stub) list(APPEND MGE_ATLAS_LIBS atlas-stub) @@ -736,7 +733,6 @@ if(MGE_WITH_ATLAS) set(MGB_ATLAS ${MGE_WITH_ATLAS}) endif() - find_program(CCACHE_BIN ccache) if(CCACHE_BIN) set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN}) @@ -834,12 +830,10 @@ endif() set(MGB_CUDA ${MGE_WITH_CUDA}) set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA}) - #ROCM set(MGB_ROCM ${MGE_WITH_ROCM}) set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM}) - # CAMBRICON set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON}) set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON}) @@ -1029,7 +1023,6 @@ if(MGE_BUILD_SDK) add_subdirectory(sdk/load-and-run) endif() - if(MGE_BUILD_IMPERATIVE_RT) add_subdirectory(imperative) message(STATUS "Enable imperative python wrapper runtime") @@ -1117,4 +1110,3 @@ if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") endif() - diff --git a/cmake/Halide.cmake b/cmake/Halide.cmake index 4b145daf983ad7887245997ef97d8c313576aa4c..2dc8ecab902637edf07717fe837f96ca9ddce2b7 100644 --- a/cmake/Halide.cmake +++ b/cmake/Halide.cmake @@ -1,3 +1,4 @@ + include(ExternalProject) find_package(LLVM 6.0 REQUIRED CONFIG) diff --git a/dnn/CMakeLists.txt b/dnn/CMakeLists.txt index 939006d261887de309514d2a09a5f5fb3d57605c..6270da9862bee1a544a59e4985abb353b6bf9aed 100644 --- a/dnn/CMakeLists.txt +++ b/dnn/CMakeLists.txt @@ -38,7 +38,6 @@ list(APPEND OPR_PARAM_DEFS_OUTS ) list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) - install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h") add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS}) @@ -56,7 +55,6 @@ endforeach() add_dependencies(opr_param_defs _opr_param_defs) install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS}) - if(MGE_WITH_CUDA) add_library(cutlass INTERFACE) target_include_directories(cutlass diff --git a/dnn/include/megdnn/config/config.h b/dnn/include/megdnn/config/config.h index 619f786a6709ecdbfe6897636d46b81010138404..4d7ec4074d03b0b62d103c8f03f27d1e1bdd7ea0 100644 --- a/dnn/include/megdnn/config/config.h +++ b/dnn/include/megdnn/config/config.h @@ -13,7 +13,6 @@ #if !defined(__CUDACC__) && !defined(__HIPCC__) - #endif // !defined(__CUDACC__) // vim: syntax=cpp.doxygen diff --git a/dnn/include/megdnn/handle.h b/dnn/include/megdnn/handle.h index c72bc120dd142fe95e9a5a961d4780fadd693c12..f938d1299609f65fea25190269d8791a5b74466e 100644 --- a/dnn/include/megdnn/handle.h +++ b/dnn/include/megdnn/handle.h @@ -90,7 +90,6 @@ class Handle { std::unique_ptr create_rocm_operator(); #endif - virtual ~Handle(); /*! diff --git a/dnn/src/CMakeLists.txt b/dnn/src/CMakeLists.txt index fb1d59c90f690969f4dc5bd3b24a85507c011c92..ddd7f35498ce5ed1df86c4449594f2b643ba1761 100644 --- a/dnn/src/CMakeLists.txt +++ b/dnn/src/CMakeLists.txt @@ -137,11 +137,9 @@ if(MGE_WITH_CUDA) gen_cutlass_kimpl(conv2d tensorop8832) file(GLOB_RECURSE CUTLASS_SOURCES ${CUTLASS_GEN_DIR}/*.cu) list(APPEND SOURCES ${CUTLASS_SOURCES}) - list(APPEND SOURCES ${CUSOURCES}) endif() - if(MGE_WITH_CAMBRICON) file(GLOB_RECURSE SOURCES_ cambricon/*.cpp) list(APPEND SOURCES ${SOURCES_}) @@ -161,7 +159,6 @@ if(MGE_WITH_ATLAS) list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1) endif() - add_definitions(${LIBMEGDNN_DEF}) add_library(megdnn EXCLUDE_FROM_ALL OBJECT ${SOURCES}) @@ -186,7 +183,6 @@ if(MGE_WITH_ROCM) ${AMDOCL_LIBRARY_DIR}) endif() - if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") if(MGE_ENABLE_CPUINFO) target_link_libraries(megdnn PRIVATE $) diff --git a/dnn/src/common/flag_warn.cpp b/dnn/src/common/flag_warn.cpp index 01a8745ea30fff98d45501b0eb88d595b805b747..a19d9ff73d72c28cf171e71ff15253212a2c5fff 100644 --- a/dnn/src/common/flag_warn.cpp +++ b/dnn/src/common/flag_warn.cpp @@ -15,5 +15,4 @@ #pragma message "Mangling is disabled." #endif // MEGDNN_ENABLE_MANGLING - // vim: syntax=cpp.doxygen diff --git a/dnn/src/common/handle.cpp b/dnn/src/common/handle.cpp index ea9208b43ca5ce21256d0a54d13d8532ee791e44..75b282d8d1b48274d8f5cdf9ea5231d9a9137c39 100644 --- a/dnn/src/common/handle.cpp +++ b/dnn/src/common/handle.cpp @@ -31,13 +31,10 @@ #include "src/aarch64/handle.h" #endif - - #if MEGDNN_WITH_CUDA #include "src/cuda/handle.h" #endif - #if MEGDNN_WITH_CAMBRICON #include "src/cambricon/handle.h" #endif @@ -128,7 +125,6 @@ std::unique_ptr Handle::make(megcoreComputingHandle_t computing_handle, return nullptr; } - void Handle::set_destructor(const thin_function& d) { megdnn_assert(!m_destructor, "destructor can be set only once"); m_destructor = d; diff --git a/dnn/src/common/megcore/common/computing_context.cpp b/dnn/src/common/megcore/common/computing_context.cpp index df191d75bf92e171795bfb28788776328734493e..35d118fd1d0764ed29fdb08ddf649dfea0691839 100644 --- a/dnn/src/common/megcore/common/computing_context.cpp +++ b/dnn/src/common/megcore/common/computing_context.cpp @@ -17,8 +17,6 @@ #include "src/cuda/megcore/cuda_computing_context.hpp" #endif - - #if MEGDNN_WITH_ROCM #include "src/rocm/megcore/computing_context.hpp" #endif diff --git a/dnn/src/common/warp_common.h b/dnn/src/common/warp_common.h index 6db4642b4e196a45fa14cacfc460dab22fab1825..02e0298ceb2f9177516fad88866bfdc644a7ac28 100644 --- a/dnn/src/common/warp_common.h +++ b/dnn/src/common/warp_common.h @@ -880,7 +880,6 @@ void remap(const Mat& src, Mat& dst, Mat& map1, Mat& map2, for (; x1 <= bcols - 8; x1 += 8) vst1q_u16(A + x1, vandq_u16(vld1q_u16(sA + x1), v_scale)); - #endif for (; x1 < bcols; ++x1) A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2 - 1)); diff --git a/dnn/src/cuda/conv_bias/algo.cpp b/dnn/src/cuda/conv_bias/algo.cpp index fd290e08aa61aa28ff0135f73640ea40babeda74..80f2846ecfc755df435e55b5c0739e378acedc3b 100644 --- a/dnn/src/cuda/conv_bias/algo.cpp +++ b/dnn/src/cuda/conv_bias/algo.cpp @@ -287,7 +287,6 @@ void ConvBiasForwardImpl::AlgoPack::fill_dp4a_algos() { int8_nchw4_dotprod.emplace_back(AlgoParam{16, 64, 8, 16, 64, 8, 2}); } - ConvBiasForwardImpl::AlgoBase* ConvBiasForwardImpl::AlgoPack::cudnn_conv_from_enum( cudnnConvolutionFwdAlgo_t algo) { diff --git a/dnn/src/cuda/conv_bias/algo.h b/dnn/src/cuda/conv_bias/algo.h index 7349ae1e44c2d4e0ca4a155563aaa6f4d7e143eb..e892ff8cd32a6a4c21e2c5bc54b2b33f58830c82 100644 --- a/dnn/src/cuda/conv_bias/algo.h +++ b/dnn/src/cuda/conv_bias/algo.h @@ -1037,7 +1037,6 @@ private: WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; }; - class ConvBiasForwardImpl::AlgoPack : NonCopyableObj { private: AlgoBase::Mapper m_all_algos_map; diff --git a/dnn/src/fallback/convolution/img2col_helper.h b/dnn/src/fallback/convolution/img2col_helper.h index 9317c5eb66417727cf31dec03d5f336e4ad1f1ed..00379fa4e13bda59c480d1ad27f72eb51c97c5fb 100644 --- a/dnn/src/fallback/convolution/img2col_helper.h +++ b/dnn/src/fallback/convolution/img2col_helper.h @@ -10,7 +10,6 @@ */ #include "src/common/utils.h" - namespace { template diff --git a/dnn/test/CMakeLists.txt b/dnn/test/CMakeLists.txt index 0cc8fc7c40be3b2c5076625dfa1655f11f3e4ac1..298acef5dfb19a0cf7db5756843641ccdc9f9af1 100644 --- a/dnn/test/CMakeLists.txt +++ b/dnn/test/CMakeLists.txt @@ -34,7 +34,6 @@ if(MGE_WITH_CAMBRICON) list(APPEND SOURCES ${SOURCES_}) endif() - if(MGE_WITH_ATLAS) file(GLOB_RECURSE SOURCES_ atlas/*.cpp) list(APPEND SOURCES ${SOURCES_}) @@ -45,8 +44,6 @@ if (MGE_WITH_ROCM) list (APPEND SOURCES ${SOURCES_}) endif() - - add_executable(megdnn_test ${SOURCES}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") target_link_libraries(megdnn_test gtest) @@ -60,7 +57,6 @@ if(MGE_WITH_ATLAS) target_link_libraries(megdnn_test atlas-stub) endif() - target_include_directories(megdnn_test PRIVATE ${PROJECT_SOURCE_DIR}/third_party/midout/src diff --git a/dnn/test/common/conv_bias.cpp b/dnn/test/common/conv_bias.cpp index 9ffe92b3ad1ad9d748134c2d2115a47b94674ce5..fe2022358ee0bb3e7376e44e2c5d7269b6447880 100644 --- a/dnn/test/common/conv_bias.cpp +++ b/dnn/test/common/conv_bias.cpp @@ -494,7 +494,6 @@ std::vector get_int8_nchw44_args(size_t kernel_size, size_t pack_size, return args; } - std::vector get_int8_nchw4_args_check_bounds(size_t kernel_size) { std::vector args; param::ConvBias cur_param; @@ -530,7 +529,6 @@ std::vector get_int8_nchw4_args_check_bounds(size_t kernel_size) { return args; } - std::vector get_int8_nchw4_args_small_batch(size_t kernel_size) { std::vector args; param::ConvBias cur_param; @@ -974,7 +972,6 @@ void benchmark_winograd(const char* algo_name, Handle* handle, size_t kernel, } #endif // MEGDNN_WITH_BENCHMARK - std::vector get_conv_bias_args( std::vector kernel, size_t stride, bool no_pad, bool no_bias, bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) { @@ -1188,7 +1185,6 @@ void check_conv_bias_preprocess(std::vector args, } } - void checker_conv_bias_common(std::vector args, Handle* handle, RNG* rng, float epsilon, DType type0, DType type1, DType type2, DType type3, const char* algo_name) { diff --git a/dnn/test/common/conv_bias.h b/dnn/test/common/conv_bias.h index 573efe64c2a0085924863951f063e0aef98a64d5..fbc9edf4191d4c7934b7f4626d45389e10f3a9ee 100644 --- a/dnn/test/common/conv_bias.h +++ b/dnn/test/common/conv_bias.h @@ -93,7 +93,6 @@ void check_conv_bias(std::vector args, void checker_conv_bias_int8x8x16( std::vector args, megdnn::Handle* handle, const char* algo_name); - void checker_conv_bias_common(std::vector args, Handle* handle, RNG* rng, float epsilon, DType type0, DType type1, DType type2, diff --git a/dnn/test/common/small_vector.cpp b/dnn/test/common/small_vector.cpp index 667d260ea9439cf5a6da9cfff245f151830b307a..3a88aa186b68fa4590e5ee594cea60737e5f49f2 100644 --- a/dnn/test/common/small_vector.cpp +++ b/dnn/test/common/small_vector.cpp @@ -1145,7 +1145,6 @@ TEST(SmallVectorTest, SwapMoveOnly) { } } } - } // anonymous namespace // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/dnn/test/common/test_basic_types.cpp b/dnn/test/common/test_basic_types.cpp index c377fde506ff9fd9f2e2e7fff4da0c67d8ed516d..e88768b0cd191fa751a838eb4321481ab5ce569c 100644 --- a/dnn/test/common/test_basic_types.cpp +++ b/dnn/test/common/test_basic_types.cpp @@ -40,7 +40,6 @@ TensorLayout make_layout(std::initializer_list shape, } } // anonymous namespace - #if MEGDNN_64_BIT TEST(BASIC_TYPES, TOTAL_NR_ELEMS) { TensorShape shp{1u<<31, 1u<<31}; @@ -340,5 +339,4 @@ TEST(BASIC_TYPES, TENSOR_LAYOUT_FMT_LOW_BITS_VALID) { LowbitsAlignedToBytesTensorFormat::make(4_z)), MegDNNError); } - // vim: syntax=cpp.doxygen diff --git a/dnn/test/cuda/conv_bias_int8.cpp b/dnn/test/cuda/conv_bias_int8.cpp index b4f3ebaede7c895a2c24399377532057805bd4ef..3c593dcc6fce668972e57a02862fca2f1adcf2ca 100644 --- a/dnn/test/cuda/conv_bias_int8.cpp +++ b/dnn/test/cuda/conv_bias_int8.cpp @@ -697,7 +697,6 @@ TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_UNROLL_WIDTH_TENSORCORE_1x1_ALGO_2) { conv_bias::get_int8_chwn4_args_small_batch(1)); } - TEST_F(CUDA, FALLBACK_CONV_QS8) { require_compute_capability_eq(7, 5); Checker checker(handle_cuda()); @@ -1100,7 +1099,6 @@ TEST_F(CUDA, BENCHMARK_CONV_BIAS_INT8_NCHW4_NCHW) { run({{16, 16, 46, 80, 4}, {32, 16, 3, 3, 4}, {1, 32, 1, 1}}); } - #if CUDA_VERSION >= 10020 TEST_F(CUDA, BENCHMARK_CUTLASS_CONV_BIAS_INT8_NCHW32) { require_compute_capability(7, 5); diff --git a/dnn/test/cuda/elemwise_multi_type.cpp b/dnn/test/cuda/elemwise_multi_type.cpp index b57037b56fff29719715d7d65cb829c3c063eee5..ca771a5acb7660b387f90510d68fdfd0f57978e0 100644 --- a/dnn/test/cuda/elemwise_multi_type.cpp +++ b/dnn/test/cuda/elemwise_multi_type.cpp @@ -32,7 +32,6 @@ TYPED_TEST(CUDA_ELEMWISE_MULTI_TYPE, run) { elemwise_multi_type::run_test(this->handle_cuda()); } - using Mode = ElemwiseMultiType::Param::Mode; static void run_test(int arity, Checker& checker, Mode mode) { for (auto type : std::vector>{ diff --git a/dnn/test/cuda/sleep.cpp b/dnn/test/cuda/sleep.cpp index c53073730823bfa8f9e3fa79064165be32053319..c32cb5964f6cfd0e645a02daaa0f4603e5f2c5c0 100644 --- a/dnn/test/cuda/sleep.cpp +++ b/dnn/test/cuda/sleep.cpp @@ -22,7 +22,6 @@ using namespace megdnn; using namespace test; - TEST_F(CUDA, SLEEP) { auto opr = this->handle_cuda()->create_operator(); @@ -53,6 +52,5 @@ TEST_F(CUDA, SLEEP) { } - // vim: syntax=cpp.doxygen diff --git a/dnn/test/fallback/conv_bias.cpp b/dnn/test/fallback/conv_bias.cpp index ee022652871b5e4d1b96b26c82095e81374f2c36..78a19cb8a621c8ce19c7e60fac5badc6c961e454 100644 --- a/dnn/test/fallback/conv_bias.cpp +++ b/dnn/test/fallback/conv_bias.cpp @@ -75,7 +75,6 @@ TEST_F(FALLBACK, CONV_BIAS_FORWARD) { .execs({src_shape, filter_shape, bias_shape, {}, {}}) .execs({src_shape, filter_shape, bias_shape_channel, {}, {}}); } - } std::vector get_conv_bias_args( @@ -236,7 +235,6 @@ TEST_F(FALLBACK_MULTI_THREADS, CONV_BIAS_FORWARD_QUANTIZED) { "FALLBACK_NAIVE"); } - #if MEGDNN_WITH_BENCHMARK TEST_F(FALLBACK, BENCHMARK_CONVBIAS) { constexpr size_t RUNS = 10; diff --git a/imperative/python/megengine/functional/quantized.py b/imperative/python/megengine/functional/quantized.py index 17a45d04de214f2b092a44f4f259dac25ed28ceb..16975c7a63d867ff148e4fad1b15413286e9ce5c 100644 --- a/imperative/python/megengine/functional/quantized.py +++ b/imperative/python/megengine/functional/quantized.py @@ -139,3 +139,52 @@ def batch_conv_bias_activation( ) (outputs,) = apply(op, inp, weight, bias) return outputs + + +def conv_transpose2d( + inp: Tensor, + weight: Tensor, + bias: Tensor = None, + dtype=None, + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + conv_mode="cross_correlation", + compute_mode="default", +) -> Tensor: + + assert ( + conv_mode.lower() == "cross_correlation" + or conv_mode.name == "CROSS_CORRELATION" + ) + assert compute_mode.lower() == "default" or compute_mode.name == "DEFAULT" + + if groups != 1: + raise NotImplementedError( + "group quantized transposed conv2d is not supported yet." + ) + if bias is not None: + raise NotImplementedError( + "bias of quantized transposed conv2d is not supported yet." + ) + + pad_h, pad_w = _pair(padding) + stride_h, stride_w = _pair_nonzero(stride) + dilate_h, dilate_w = _pair_nonzero(dilation) + + # should be replaced by Op with bias such as ConvolutionBackwardDataBias + op = builtin.ConvolutionBackwardData( + stride_h=stride_h, + stride_w=stride_w, + pad_h=pad_h, + pad_w=pad_w, + dilate_h=dilate_h, + dilate_w=dilate_w, + strategy=get_execution_strategy(), + dtype=dtype, + compute_mode=compute_mode, + mode=conv_mode, + ) + (output,) = apply(op, weight, inp) + return output diff --git a/imperative/python/megengine/module/conv.py b/imperative/python/megengine/module/conv.py index 861c9269879773658e8731d02fcd14ce2cc1f80f..183356db392621910c584d84c2119d88a687f565 100644 --- a/imperative/python/megengine/module/conv.py +++ b/imperative/python/megengine/module/conv.py @@ -651,11 +651,11 @@ class ConvTranspose2d(_ConvNd): # Assume format is NCHW return (1, self.out_channels, 1, 1) - def forward(self, inp): + def calc_conv_transpose2d(self, inp, weight, bias): return conv_transpose2d( inp, - self.weight, - self.bias, + weight, + bias, self.stride, self.padding, self.dilation, @@ -664,6 +664,9 @@ class ConvTranspose2d(_ConvNd): self.compute_mode, ) + def forward(self, inp): + return self.calc_conv_transpose2d(inp, self.weight, self.bias) + class LocalConv2d(Conv2d): r""" diff --git a/imperative/python/megengine/module/qat/__init__.py b/imperative/python/megengine/module/qat/__init__.py index 9a9295f9fc7ffaf84f784cd4cafa21cfd760f177..86ff723524b05381fd3e53eefe564b7bcc650e7d 100644 --- a/imperative/python/megengine/module/qat/__init__.py +++ b/imperative/python/megengine/module/qat/__init__.py @@ -7,7 +7,7 @@ # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. from .batch_matmul_activation import BatchMatMulActivation from .concat import Concat -from .conv import Conv2d, ConvRelu2d +from .conv import Conv2d, ConvRelu2d, ConvTranspose2d from .conv_bn import ConvBn2d, ConvBnRelu2d from .elemwise import Elemwise from .linear import Linear diff --git a/imperative/python/megengine/module/qat/conv.py b/imperative/python/megengine/module/qat/conv.py index c3608d591585675d338f01eb5b81419ba4ce6c8e..c8465f8c782a42133b5a07d21d7b70fe96e344db 100644 --- a/imperative/python/megengine/module/qat/conv.py +++ b/imperative/python/megengine/module/qat/conv.py @@ -57,3 +57,42 @@ class ConvRelu2d(Conv2d): def forward(self, inp): return self.apply_quant_activation(F.relu(self.calc_conv_qat(inp))) + + +class ConvTranspose2d(Float.ConvTranspose2d, QATModule): + r""" + A :class:`~.QATModule` :class:`~.module.ConvTranspose2d` with QAT support. + Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. + """ + + def calc_conv_transpose2d_qat(self, inp): + w_qat = self.apply_quant_weight(self.weight) + b_qat = self.apply_quant_bias(self.bias, inp, w_qat) + conv = self.calc_conv_transpose2d(inp, w_qat, b_qat) + return conv + + @classmethod + def from_float_module(cls, float_module: Float.ConvTranspose2d): + r""" + Return a :class:`~.QATModule` instance converted from + a float :class:`~.Module` instance. + """ + qat_module = cls( + float_module.in_channels, + float_module.out_channels, + float_module.kernel_size, + float_module.stride, + float_module.padding, + float_module.dilation, + float_module.groups, + float_module.bias is not None, + float_module.conv_mode, + float_module.compute_mode, + name=float_module.name, + ) + qat_module.weight = float_module.weight + qat_module.bias = float_module.bias + return qat_module + + def forward(self, inp): + return self.apply_quant_activation(self.calc_conv_transpose2d_qat(inp)) diff --git a/imperative/python/megengine/module/quantized/__init__.py b/imperative/python/megengine/module/quantized/__init__.py index 5c21359e0dbb7117a08c5f7fd4207ef079a7c53c..ed9866f1e1719b30c5654fa5f04e1bd1a56841be 100644 --- a/imperative/python/megengine/module/quantized/__init__.py +++ b/imperative/python/megengine/module/quantized/__init__.py @@ -7,7 +7,7 @@ # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. from .batch_matmul_activation import BatchMatMulActivation from .concat import Concat -from .conv import Conv2d, ConvRelu2d +from .conv import Conv2d, ConvRelu2d, ConvTranspose2d from .conv_bn import ConvBn2d, ConvBnRelu2d from .elemwise import Elemwise from .linear import Linear diff --git a/imperative/python/megengine/module/quantized/conv.py b/imperative/python/megengine/module/quantized/conv.py index 0230dda5d0bda6979ca0c84b65a2e62d3cd8f498..4d7a5b5f6f5132450d4bd3df23d86685d8e07217 100644 --- a/imperative/python/megengine/module/quantized/conv.py +++ b/imperative/python/megengine/module/quantized/conv.py @@ -12,6 +12,7 @@ import numpy as np from ... import module as Float from ...core.tensor import dtype from ...functional.nn import conv_bias_activation +from ...functional.quantized import conv_transpose2d from ...tensor import Parameter from ..qat import conv as QAT from .module import QuantizedModule @@ -108,3 +109,98 @@ class ConvRelu2d(Conv2d): def forward(self, inp): return self.calc_conv_quantized(inp, nonlinear_mode="relu") + + +class ConvTranspose2d(Float.ConvTranspose2d, QuantizedModule): + r"""Quantized version of :class:`~.qat.ConvTranspose2d`. + + Applies a 2D transposed convolution over a quantized input tensor, used + for inference only. + + The parameter is same with :class:`~.module.ConvTranspose2d` but dtype. + + :param dtype: data type of the output, should be qint8. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + groups: int = 1, + bias: bool = True, + conv_mode: str = "cross_correlation", + compute_mode: str = "default", + dtype=None, + **kwargs + ): + super().__init__( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias, + conv_mode=conv_mode, + compute_mode=compute_mode, + ) + self.output_dtype = dtype + + @classmethod + def from_qat_module(cls, qat_module: QAT.ConvTranspose2d): + r""" + return a :class:`~.QuantizedModule` instance converted from a + :class:`~.QATModule` instance. + """ + output_dtype = qat_module.get_activation_dtype() + qconv = cls( + qat_module.in_channels, + qat_module.out_channels, + qat_module.kernel_size, + qat_module.stride, + qat_module.padding, + qat_module.dilation, + qat_module.groups, + qat_module.bias is not None, + qat_module.conv_mode, + qat_module.compute_mode, + dtype=output_dtype, + name=qat_module.name, + ) + weight = qat_module.weight.astype(qat_module.get_weight_dtype()) + qconv.weight = Parameter(weight.numpy(), name=qat_module.weight.name) + qconv.bias = ( + Parameter(qat_module.bias.numpy(), name=qat_module.bias.name) + if qat_module.bias is not None + else None + ) + return qconv + + def calc_conv_transpose2d_quantized(self, inp): + if self.bias is not None: + inp_scale = dtype.get_scale(inp.dtype) + w_scale = dtype.get_scale(self.weight.dtype) + bias_scale = inp_scale * w_scale + + return conv_transpose2d( + inp=inp, + weight=self.weight, + bias=self.bias.astype(dtype.qint32(bias_scale)) + if self.bias is not None + else None, + dtype=self.output_dtype, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + conv_mode=self.conv_mode, + compute_mode=self.compute_mode, + ) + + def forward(self, inp): + return self.calc_conv_transpose2d_quantized(inp) diff --git a/imperative/python/megengine/quantization/internal_fake_quant.py b/imperative/python/megengine/quantization/internal_fake_quant.py index 7972c62254c690a3480db9c8534d7f375d50f399..afec8fe925e6cf257a5255035b26354b249cc0ff 100644 --- a/imperative/python/megengine/quantization/internal_fake_quant.py +++ b/imperative/python/megengine/quantization/internal_fake_quant.py @@ -13,5 +13,3 @@ from .fake_quant import _FakeQuantize from .observer import MinMaxObserver from .qconfig import QConfig from .utils import QParams - - diff --git a/imperative/python/megengine/utils/persistent_cache.py b/imperative/python/megengine/utils/persistent_cache.py index eda132c2a568c3c7303391ceb10093946a907063..b0bd351d426fac7a01cdf54861787b2b658ecfd7 100644 --- a/imperative/python/megengine/utils/persistent_cache.py +++ b/imperative/python/megengine/utils/persistent_cache.py @@ -69,7 +69,6 @@ class PersistentCacheOnServer(_PersistentCache): def make_user_prefix(cls): return "mgbcache:{}".format(getpass.getuser()) - def _make_key(self, category, key): prefix_with_version = "{}:MGB{}".format(self._prefix, __version__) return b"@".join( @@ -86,5 +85,3 @@ class PersistentCacheOnServer(_PersistentCache): key = self._make_key(category, key) self._prev_get_refkeep = conn.get(key) return self._prev_get_refkeep - - diff --git a/imperative/python/setup.py b/imperative/python/setup.py index 21c5365defce0d80059976c45e82a92b2437061a..7441c5abe823d3bf9a1848cd0949427f2bfdaf08 100644 --- a/imperative/python/setup.py +++ b/imperative/python/setup.py @@ -38,7 +38,6 @@ class build_ext(_build_ext): modpath = str(pathlib.Path(*modpath).resolve()) copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) - package_name = 'MegEngine' v = {} @@ -79,7 +78,6 @@ megengine_data += [ for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*') ] - with open('requires.txt') as f: requires = f.read().splitlines() with open('requires-style.txt') as f: @@ -108,8 +106,6 @@ setup_kwargs = dict( cmdclass={'build_ext': build_ext}, scripts = ['./megengine/tools/mge'], ) - - setup_kwargs.update(dict( classifiers=[ 'Development Status :: 3 - Alpha', diff --git a/imperative/python/test/unit/functional/test_functional.py b/imperative/python/test/unit/functional/test_functional.py index 612f6612fca4a76912643630ca9cf8e9179e261f..36f18697705888104b486ec1e13cc829b92131c4 100644 --- a/imperative/python/test/unit/functional/test_functional.py +++ b/imperative/python/test/unit/functional/test_functional.py @@ -876,8 +876,6 @@ def test_nms_is_same(): assert op3 != op4 - - def test_argmxx_on_inf(): def run_argmax(): x = F.zeros((100, 100)) diff --git a/imperative/python/test/unit/module/test_qat.py b/imperative/python/test/unit/module/test_qat.py index 51b5206e5d12544ea1380418daae6af49cbad145..2cb3f5a8788190ecdd1c8d40e6df08e1979ce1e6 100644 --- a/imperative/python/test/unit/module/test_qat.py +++ b/imperative/python/test/unit/module/test_qat.py @@ -13,6 +13,7 @@ from megengine.module import ( Conv2d, ConvBn2d, ConvRelu2d, + ConvTranspose2d, DequantStub, Module, QuantStub, @@ -202,3 +203,40 @@ def test_quantize_batchmatmul_activation(): infer_cg = cgtools.GraphInference(file)[0] dumped_outputs = list(infer_cg.run(inputs.numpy()).values())[0] np.testing.assert_allclose(quantize_outputs.numpy(), dumped_outputs, atol=1e-6) + + +def test_qat_conv_transpose2d(): + in_channels = 32 + out_channels = 64 + kernel_size = 3 + + class TestNet(Module): + def __init__(self, bias): + super().__init__() + self.quant = QuantStub() + self.dequant = DequantStub() + self.conv = ConvTranspose2d( + in_channels, out_channels, kernel_size, bias=bias + ) + + def forward(self, inp): + out = self.quant(inp) + out = self.conv(out) + out = self.dequant(out) + return out + + inputs = tensor(np.random.randn(4, in_channels, 32, 32).astype(np.float32)) + for bias in [True, False]: + net = TestNet(bias) + net.train() + qat_net = quantize_qat(net, inplace=False) + disable_fake_quant(qat_net) + normal_outputs = net(inputs) + qat_outputs = qat_net(inputs) + np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy()) + + net.eval() + normal_outputs = net(inputs) + qat_net.eval() + qat_outputs = qat_net(inputs) + np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy()) diff --git a/imperative/python/test/unit/quantization/test_fake_quant.py b/imperative/python/test/unit/quantization/test_fake_quant.py index 612e03ab7e6cabec4a8d8964ecc5404765723f9e..8ee961ed7de74fcf5f5b4bf3474ffce9de57b09e 100644 --- a/imperative/python/test/unit/quantization/test_fake_quant.py +++ b/imperative/python/test/unit/quantization/test_fake_quant.py @@ -92,8 +92,6 @@ def test_tqt(): np.testing.assert_allclose(g_s.numpy(), g_s_np, rtol=5e-5, atol=5e-5) - - def _save_to(self, name="grad"): def callback(grad): setattr(self, name, grad) diff --git a/imperative/python/test/unit/quantization/test_op.py b/imperative/python/test/unit/quantization/test_op.py index beb151d9ef948fd58fe073f28b52bb78abbcbcfa..9b07ac02ae098b89ac785779d35173c4edd02a1a 100644 --- a/imperative/python/test/unit/quantization/test_op.py +++ b/imperative/python/test/unit/quantization/test_op.py @@ -14,6 +14,7 @@ import megengine.functional as F from megengine.core.tensor import dtype from megengine.device import get_device_count from megengine.functional.elemwise import _elemwise_multi_type, _elwise +from megengine.module.quantized.conv import ConvTranspose2d from megengine.quantization import QuantMode, create_qparams @@ -168,3 +169,94 @@ def test_conv_bias(): run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False, "relu") run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, True, "relu") + + +def test_conv_transpose2d(): + rng = np.random.RandomState(seed=2021) + + def test_func( + N, + IC, + IH, + IW, + OC, + KH, + KW, + SH, + SW, + PH, + PW, + DH, + DW, + groups=1, + has_bias=True, + conv_mode: str = "cross_correlation", + compute_mode: str = "default", + ): + inp_scale = np.float32(rng.uniform(low=0.04, high=0.06)) + weight_scale = np.float32(rng.uniform(low=0.04, high=0.06)) + bias_scale = inp_scale * weight_scale + out_scale = np.float32(rng.uniform(low=0.04, high=0.06)) + + inp_dtype = dtype.qint8(inp_scale) + weight_dtype = dtype.qint8(weight_scale) + bias_dtype = dtype.qint32(bias_scale) + out_dtype = dtype.qint8(out_scale) + + inp_fp32 = rng.uniform(low=-1, high=1, size=(N, IC, IH, IW)).astype(np.float32) + weight_fp32 = rng.uniform(low=-1, high=1, size=(IC, OC, KH, KW)).astype( + np.float32 + ) + bias_fp32 = rng.uniform(low=-1, high=1, size=(1, OC, 1, 1)).astype(np.float32) + + inp_int8 = dtype.convert_to_qint8(inp_fp32, inp_dtype) + weight_int8 = dtype.convert_to_qint8(weight_fp32, weight_dtype) + bias_int32 = dtype.convert_to_qint32(bias_fp32, bias_dtype) + + inp_int8 = mge.tensor(inp_int8, dtype=inp_dtype) + weight_int8 = mge.Parameter(weight_int8, dtype=weight_dtype) + bias_int32 = mge.Parameter(bias_int32, dtype=bias_dtype) + + inp_fp32 = inp_int8.astype("float32") + weight_fp32 = weight_int8.astype("float32") + bias_fp32 = bias_int32.astype("float32") + + expected = F.conv_transpose2d( + inp_fp32, + weight_fp32, + bias_fp32 if has_bias else None, + stride=(SH, SW), + padding=(PH, PW), + dilation=(DH, DW), + groups=groups, + conv_mode=conv_mode, + compute_mode=compute_mode, + ) + expected = dtype.convert_to_qint8(expected.numpy(), out_dtype) + expected = dtype.convert_from_qint8(expected) + + conv_transpose2d = ConvTranspose2d( + in_channels=IC, + out_channels=OC, + kernel_size=(KH, KW), + stride=(SH, SW), + padding=(PH, PW), + dilation=(DH, DW), + groups=groups, + bias=has_bias, + conv_mode=conv_mode, + compute_mode=compute_mode, + dtype=out_dtype, + ) + + conv_transpose2d.weight = mge.Parameter(weight_int8) + if has_bias: + conv_transpose2d.bias = mge.Parameter(bias_int32) + result = conv_transpose2d.forward(inp_int8).numpy() + result = dtype.convert_from_qint8(result) + np.testing.assert_allclose(result, expected, atol=out_scale) + + test_func(1, 4, 1, 1, 4, 1, 1, 1, 1, 0, 0, 1, 1, 1, False) + test_func(2, 4, 3, 1, 8, 1, 1, 1, 1, 0, 0, 1, 1, 1, False) + test_func(4, 4, 16, 16, 8, 3, 3, 1, 1, 1, 1, 1, 1, 1, False) + test_func(32, 64, 36, 28, 16, 3, 2, 1, 3, 1, 0, 1, 1, 1, False) diff --git a/imperative/python/test/unit/utils/test_network_node.py b/imperative/python/test/unit/utils/test_network_node.py index 15cb7c1d1a65ebad15301a060280f6ddc162788e..dc2fa6d0dc648b96152247d7f6deb0c83173cb7d 100644 --- a/imperative/python/test/unit/utils/test_network_node.py +++ b/imperative/python/test/unit/utils/test_network_node.py @@ -486,8 +486,6 @@ def test_topk(): check_pygraph_dump(fwd, [x], [top, indices]) - - def test_random(): @trace(symbolic=True, capture_as_const=True) def fwd(): @@ -723,8 +721,6 @@ def test_elemwise_multitype(): check_pygraph_dump(fwd, [x, y], [result]) - - def test_cvtcolor(): inp = np.random.randn(3, 3, 3, 3).astype(np.float32) x = Tensor(inp) diff --git a/imperative/python/version_template.py b/imperative/python/version_template.py index e5d40075afd7bd7a91409dc540f3d28db87c02ba..f45d439922daea36ef7b020dcabc852663c69da4 100644 --- a/imperative/python/version_template.py +++ b/imperative/python/version_template.py @@ -7,4 +7,3 @@ # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. __version__ = "1.6.0.dev" - diff --git a/imperative/src/impl/ops/convolution.cpp b/imperative/src/impl/ops/convolution.cpp index 8a517712b31360aff60e90c97342af1577e41a8e..90c5a5f6027dd408b485e797727a7b7c57aa5d5d 100644 --- a/imperative/src/impl/ops/convolution.cpp +++ b/imperative/src/impl/ops/convolution.cpp @@ -43,6 +43,11 @@ auto apply_on_var_node( const VarNodeArray& inputs) { auto&& conv = static_cast(def); OperatorNodeConfig config{conv.make_name()}; + DType output_dtype = conv.dtype; + if (output_dtype.valid()) { + config.output_dtype(output_dtype); + } + if (inputs.size() == 2) { return opr::ConvolutionBackwardData::make(inputs[0], inputs[1], conv.param(), conv.policy(), config); } else { diff --git a/scripts/whl/macos/macos_build_whl.sh b/scripts/whl/macos/macos_build_whl.sh index 9533b7c3675df90f6354327c1665d12fc719c56d..63ff3b0b84025080e16cded0bf8fcba360389616 100755 --- a/scripts/whl/macos/macos_build_whl.sh +++ b/scripts/whl/macos/macos_build_whl.sh @@ -192,7 +192,6 @@ function do_build() { #handle dlopen path install_name_tool -change @rpath/libmegengine_export.dylib @loader_path/lib/libmegengine_export.dylib _imperative_rt.so - #copy megbrain_export lib DEPEND_LIB=${BUILD_DIR}/staging/megengine/core/lib/ rm -rf ${DEPEND_LIB} @@ -209,7 +208,6 @@ function do_build() { echo "comapt whl name: ${compat_whl_name}" cp ${BUILD_DIR}/staging/dist/Meg*.whl ${MACOS_WHL_HOME}/${compat_whl_name} - cd ${SRC_DIR} echo "" echo "##############################################################################################" @@ -220,12 +218,10 @@ function do_build() { done } - function third_party_prepare() { echo "init third_party..." ${SRC_DIR}/third_party/prepare.sh - if [[ -z ${ALREADY_INSTALL_MKL} ]] then echo "init third_party..." diff --git a/scripts/whl/manylinux2014/do_build_common.sh b/scripts/whl/manylinux2014/do_build_common.sh index 5fc2081dc56abd8e364c009241ce98be79cdb05f..0f1fc771e3359f907b92e344586c298989eab789 100755 --- a/scripts/whl/manylinux2014/do_build_common.sh +++ b/scripts/whl/manylinux2014/do_build_common.sh @@ -55,13 +55,11 @@ function patch_elf_depend_lib_mgb_mge() { patchelf --force-rpath --set-rpath '$ORIGIN/.' ${LIBS_DIR}/libmegengine_export.so handle_strip ${LIBS_DIR}/libmegengine_export.so - # as some version of cudnn/trt libs have dlopen libs, so we can not use auditwheel # TODO: PR for auditwheel to support args for dlopen libs handle_copy_cuda_libs ${LIBS_DIR} } - SRC_DIR=$(readlink -f "`dirname $0`/../../../") source ${SRC_DIR}/scripts/whl/utils/utils.sh @@ -142,7 +140,6 @@ do mkdir -p staging cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ - cd ${BUILD_DIR}/staging/megengine/core mkdir -p lib/ucx patch_elf_depend_lib_mgb_mge @@ -158,7 +155,6 @@ do echo "comapt whl name: ${compat_whl_name}" mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME}/${compat_whl_name} - cd /home/output chown -R ${UID}.${UID} . # compat for root-less docker env to remove output at host side diff --git a/scripts/whl/windows/windows_build_whl.sh b/scripts/whl/windows/windows_build_whl.sh index 3d0045f8b69c8ac17482e33fb3206f4a5f6e45b0..c917c4895c2432f2bb8f7ab3c6686f5682ed42a7 100755 --- a/scripts/whl/windows/windows_build_whl.sh +++ b/scripts/whl/windows/windows_build_whl.sh @@ -70,7 +70,6 @@ then BUILD_WHL_CPU_ONLY="OFF" fi - # config NVIDIA libs TRT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/TensorRT-6.0.1.5/lib/nvinfer.dll" CUDNN_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/cudnn-10.1-windows10-x64-v7.6.5.32/cuda/bin/cudnn64_7.dll" @@ -102,14 +101,11 @@ function copy_more_dll() { # empty.file to triger setup.py to create a null empty echo "empty" > ${CP_WHL_DST_IMP}/empty.file - if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then echo "copy nvidia lib to whl use...." depend_real_copy ${CP_WHL_DST_IMP} - fi } - BUILD_DIR=${SRC_DIR}/build_dir/host/build/ # here we just treat cu file should not in the increment build file list @@ -194,14 +190,12 @@ function do_build() { llvm-strip -s ${rt_file} mv ${rt_file} _imperative_rt.pyd - copy_more_dll cd ${BUILD_DIR}/staging echo "call setup.py now" ${PYTHON_DIR}/python3 setup.py bdist_wheel cp ${BUILD_DIR}/staging/dist/Meg*.whl ${WINDOWS_WHL_HOME}/ - echo "" echo "##############################################################################################" echo "windows whl package location: ${WINDOWS_WHL_HOME}" @@ -215,7 +209,6 @@ function third_party_prepare() { echo "init third_party..." ${SRC_DIR}/third_party/prepare.sh - if [[ -z ${ALREADY_INSTALL_MKL} ]] then echo "init third_party..." diff --git a/src/core/impl/comp_node_env.cpp b/src/core/impl/comp_node_env.cpp index 1c6546415c1548b392556a5d775f301ef3f6bd9e..de6a0de69f38905c1e0e329becc50d98794a7466 100644 --- a/src/core/impl/comp_node_env.cpp +++ b/src/core/impl/comp_node_env.cpp @@ -35,8 +35,6 @@ #include "megcore_atlas.h" #endif - - using namespace mgb; /* =================== MegDNNHandle =================== */ @@ -102,7 +100,6 @@ MegDNNHandle::MegDNNHandle(const CompNodeEnv& env) { } #endif - if (env.property().type == CompNode::DeviceType::CPU) { megcoreCreateDeviceHandle(&m_dev_hdl, megcorePlatformCPU); megcoreCreateComputingHandleWithCPUDispatcher(&m_comp_hdl, m_dev_hdl, @@ -234,7 +231,6 @@ void CompNodeEnv::init_cuda_async(int dev, CompNode comp_node, } #endif - #if MGB_ATLAS void mgb::_on_atlas_error(const char* expr, int err, const char* file, @@ -258,8 +254,6 @@ void CompNodeEnv::init_atlas(CompNode comp_node, const AtlasEnv& env) { } #endif - - #if MGB_ROCM void mgb::_on_hip_error(const char* expr, hipError_t err, const char* file, @@ -381,7 +375,6 @@ void CompNodeEnv::init_cpu(const CpuEnv& env, CompNode comp_node) { MegDNNHandle::get(*this).handle()->alignment_requirement(); } - #if MGB_CAMBRICON void CompNodeEnv::init_cnrt(int dev, CompNode comp_node, const ContinuationCtx& cont) { @@ -446,7 +439,6 @@ void CompNodeEnv::fini() { MGB_ATLAS_CHECK(aclrtDestroyStream(m_atlas_env.stream)); } #endif - } #if MGB_ENABLE_COMP_NODE_ASYNC_INIT diff --git a/src/core/impl/exception.cpp b/src/core/impl/exception.cpp index 7d990a180ff0a54a8a13d4061056812fd6c7e2c8..7cb61de0423ad74a784c1bcd39516faaf5258306 100644 --- a/src/core/impl/exception.cpp +++ b/src/core/impl/exception.cpp @@ -73,14 +73,11 @@ std::string CudaError::get_cuda_extra_info() { #endif } - AtlasError::AtlasError(const std::string &msg): SystemError(msg) { } - - ROCmError::ROCmError(const std::string &msg): SystemError(msg) { diff --git a/src/core/impl/graph/cg_impl.cpp b/src/core/impl/graph/cg_impl.cpp index 43b9a5a4e356860735dcfdad29d8e8037f301e23..515637312b8729026e8a44754b77f58b30257a87 100644 --- a/src/core/impl/graph/cg_impl.cpp +++ b/src/core/impl/graph/cg_impl.cpp @@ -23,7 +23,6 @@ #include "megbrain/graph/helper.h" #include "megbrain/opr/utility.h" - #if MGB_ENABLE_TENSOR_RT #include "megbrain/tensorrt/opr_replace.h" #endif @@ -554,7 +553,6 @@ ComputingGraphImpl::CompileState ComputingGraphImpl::compile_prepare( } #endif - #if MGB_JIT if (std::abs(options().graph_opt_level) == 0 && (options().graph_opt.jit || options().graph_opt.jit_config.enabled())) { diff --git a/src/core/impl/graph/var_node_mem_mgr.h b/src/core/impl/graph/var_node_mem_mgr.h index d657a0b9c1ecdb641d841797fecdff5074a001aa..953f23c1b72fbccfb3e52d5a193f2d86cea84ec4 100644 --- a/src/core/impl/graph/var_node_mem_mgr.h +++ b/src/core/impl/graph/var_node_mem_mgr.h @@ -445,7 +445,6 @@ class VarNodeMemManager { SyncableCounter m_cpu_async_release_barrier; - #if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM //! release dynamic var on after compnode event finishes class AsyncVarReleaser; diff --git a/src/core/include/megbrain/comp_node.h b/src/core/include/megbrain/comp_node.h index db8076b26f4650f0a272a3265609178268458870..87c362ed06fb4f9ec088f00a1acf88e904ae0f3c 100644 --- a/src/core/include/megbrain/comp_node.h +++ b/src/core/include/megbrain/comp_node.h @@ -508,7 +508,6 @@ class CompNode { */ static bool enable_affinity_for_cpu(bool flag); - protected: //! ImplBase with env(); defined in CompNodeEnv class Impl; diff --git a/src/core/include/megbrain/comp_node_env.h b/src/core/include/megbrain/comp_node_env.h index 3b9f1af8b786e03a4ca73fc3501e543e2c311d4d..af8575dcdccfa225c1af861fe6bf33cb4d1b2b69 100644 --- a/src/core/include/megbrain/comp_node_env.h +++ b/src/core/include/megbrain/comp_node_env.h @@ -19,8 +19,6 @@ #include "megdnn/handle.h" - - #if MGB_CUDA #include #include @@ -90,8 +88,6 @@ #endif // MGB_ATLAS - - #if MGB_ROCM #include "hcc_detail/hcc_defs_prologue.h" #include "megcore_rocm.h" @@ -196,7 +192,6 @@ namespace mgb { const char* file, const char* func, int line); #endif - #if MGB_CUDA [[noreturn]] void _on_cuda_error(const char* expr, cudaError_t err, const char* file, const char* func, int line); @@ -205,7 +200,6 @@ namespace mgb { int line); #endif - #if MGB_ROCM [[noreturn]] void _on_hip_error(const char* expr, hipError_t err, const char* file, const char* func, int line); @@ -232,7 +226,6 @@ public: mgb_assert(0, "The CompNode set_affinity is not implement"); } }; - using AtlasDispatcher = CPUDispatcher; /*! @@ -328,7 +321,6 @@ public: } #endif - } /*! @@ -370,7 +362,6 @@ public: const ContinuationCtx& cont); #endif - #if MGB_ATLAS struct AtlasEnv { int device = -1; @@ -431,8 +422,6 @@ public: void init_atlas(CompNode comp_node, const AtlasEnv& env); #endif - - #if MGB_ROCM struct ROCmEnv { int device = -1; @@ -547,7 +536,6 @@ private: CompNode m_comp_node; Property m_property; MemEventHandler m_mem_event_handler; - #if MGB_CUDA CudaEnv m_cuda_env; #endif diff --git a/src/core/include/megbrain/exception.h b/src/core/include/megbrain/exception.h index a58bd8433ce92a45f99c874a1b4ed2139a7b5758..701f056d9e2166986c62cee4c34b7173e4a05736 100644 --- a/src/core/include/megbrain/exception.h +++ b/src/core/include/megbrain/exception.h @@ -71,7 +71,6 @@ }) \ do { \ } while (0) - namespace mgb { //! the most general MegBrain exception type; also base class for all megbrain @@ -149,7 +148,6 @@ public: AtlasError(const std::string& msg); }; - class ROCmError final : public SystemError { public: /*! @@ -224,7 +222,6 @@ public: using MegBrainError::MegBrainError; }; - } // namespace mgb namespace mgb { @@ -233,5 +230,4 @@ bool has_uncaught_exception(); } // namespace mgb - // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/core/include/megbrain/ir/ops.td b/src/core/include/megbrain/ir/ops.td index 14aa42f835814f65a6c3d407efa4c79ace25f4b5..07223e7f5295385e44eb4758ae3dd1c354bf2918 100644 --- a/src/core/include/megbrain/ir/ops.td +++ b/src/core/include/megbrain/ir/ops.td @@ -49,7 +49,11 @@ def SVD: MgbHashableOp<"SVD", [SVDParam]>; def Convolution : MgbHashableOp<"Convolution", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>; -def ConvolutionBackwardData: MgbHashableOp<"ConvolutionBackwardData", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>; +def ConvolutionBackwardData: MgbHashableOp<"ConvolutionBackwardData", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]> { + let extraArguments = (ins + MgbDTypeAttr:$dtype + ); +} def Convolution3D: MgbHashableOp<"Convolution3D", [Convolution3DParam, ExecutionPolicyParamBase<"policy">]>; diff --git a/src/core/test/comp_node.cpp b/src/core/test/comp_node.cpp index 05bd4c658b0c86f3f905b17d733915b21968f124..1e45711e2f3babc5c893f5a76cf8e89cdf1610d3 100644 --- a/src/core/test/comp_node.cpp +++ b/src/core/test/comp_node.cpp @@ -40,7 +40,6 @@ TEST(TestCompNode, Parse) { ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23)); ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23)); - ASSERT_EQ(L::parse("rocmx"), make_lc(D::ROCM, -1, 0)); ASSERT_EQ(L::parse("rocm2"), make_lc(D::ROCM, 2, 0)); ASSERT_EQ(L::parse("rocm2:3"), make_lc(D::ROCM, 2, 3)); @@ -62,7 +61,6 @@ TEST(TestCompNode, Parse) { ASSERT_EQ(L::parse("multithread:default:2"), make_lc(D::MULTITHREAD, L::DEVICE_MULTITHREAD_DEFAULT, 2)); - ASSERT_THROW(L::parse("apu"), MegBrainError); ASSERT_THROW(L::parse("fpgbx"), MegBrainError); ASSERT_THROW(L::parse("cab0"), MegBrainError); @@ -165,8 +163,6 @@ TEST(TestCompNode, Load) { auto atlas1 = CompNode::load("atlas1"); ASSERT_NE(atlas0, atlas1); #endif - - } TEST(TestCompNode, FreeAfterFinalize) { @@ -355,7 +351,6 @@ TEST(TestCompNodeAtlas, MemNode) { } #endif - TEST(TestCompNodeCPU, PhysicalDispatch) { constexpr int ID = 0x2a6453e0; using L = CompNode::Locator; @@ -754,7 +749,6 @@ TEST(TestCompNodeCambricon, P2PCopy) { #endif #endif // MGB_CAMBRICON - #if MGB_ATLAS TEST(TestCompNodeAtlas, D2DCopy) { @@ -780,7 +774,6 @@ TEST(TestCompNodeAtlas, D2DCopy) { } #endif - namespace { class CompNodeDepedentObjectInst final : public CompNodeDepedentObject { int *m_dst, *m_timer; diff --git a/src/core/test/mem_alloc.cpp b/src/core/test/mem_alloc.cpp index eea86586184aef5fe18810e4f059e57bea215a25..b6f4944749ee0c2b75783f922dcad46c467bfe92 100644 --- a/src/core/test/mem_alloc.cpp +++ b/src/core/test/mem_alloc.cpp @@ -634,7 +634,6 @@ void test_gather_other(CompNode cn0, CompNode cn1) { opr::Sleep::sleep(cn1, 0.7); func->execute(); } - } // namespace #if MGB_CUDA @@ -668,5 +667,4 @@ TEST(TestCudaMemAlloc, FreeMem) { } #endif // MGB_CUDA - // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/core/test/tensor.cpp b/src/core/test/tensor.cpp index 23736404ab0de36f0582ca2e6caf53912af8b6ae..e2fbae64dace3b09607e8c9da582e44fdea237a2 100644 --- a/src/core/test/tensor.cpp +++ b/src/core/test/tensor.cpp @@ -340,7 +340,6 @@ TEST(TestTensor, ValueDump) { auto val = debug::dump_tensor(*gen({23, 45}), "test"); debug::write_to_file(output_file("TestTensor.ValueDump.bin").c_str(), val); } - template void run_negative_index_test() { constexpr size_t S0 = 200, S1 = 200; diff --git a/src/gopt/test/inference.cpp b/src/gopt/test/inference.cpp index ad1d17206cbed1dd5eee863e9a2d965fd56ca222..817f6384e6dc69a84218ecf01983be43570dc825 100644 --- a/src/gopt/test/inference.cpp +++ b/src/gopt/test/inference.cpp @@ -1912,7 +1912,6 @@ TEST_PASS(FuseConvBiasNonlinPass, Basic) { } } - #if MGB_CUDA TEST(TestEnableTensorCore, SmallInputShape) { @@ -4735,7 +4734,6 @@ TEST(TestGoptInference, PaddingChannelsWithWarpPerspective) { MGB_ASSERT_TENSOR_EQ(t1, t2); } - #endif // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/megbrain_build_config.h.in b/src/megbrain_build_config.h.in index 4407817b1f851dc7c4f3fe6c7da629381afe6ce1..49de6b1d23965eed94a0066df77780be1a932f92 100644 --- a/src/megbrain_build_config.h.in +++ b/src/megbrain_build_config.h.in @@ -67,7 +67,6 @@ #define MGB_CUDA 1 #endif - // whether to include file/line location for assert message #ifndef MGB_ASSERT_LOC #define MGB_ASSERT_LOC 1 @@ -162,7 +161,6 @@ #define MGB_JIT_HALIDE 0 #endif - #ifndef MEGDNN_WITH_CAMBRICON #define MEGDNN_WITH_CAMBRICON 0 #endif @@ -182,7 +180,6 @@ #define MGB_ENABLE_FASTRUN 1 #endif - /* ================= following are more finegrind controls ================= */ // whether to enable json dumper diff --git a/src/opr/impl/basic_arith.sereg.h b/src/opr/impl/basic_arith.sereg.h index d8180c3aebfcd3672bf72389beac63e692625cfb..97e87e0f0bc8e05ed2c2f2f737280f55b3d62431 100644 --- a/src/opr/impl/basic_arith.sereg.h +++ b/src/opr/impl/basic_arith.sereg.h @@ -162,7 +162,6 @@ namespace opr { using ReduceV2 = opr::Reduce; MGB_SEREG_OPR(ReduceV2, 0); } // namespace opr - using TypeCvtV2 = opr::TypeCvt; MGB_SEREG_OPR(TypeCvtV2, 1); diff --git a/src/opr/impl/blas.sereg.h b/src/opr/impl/blas.sereg.h index e959d88894b3f414750bb818ecfdeba2d0092bf1..59dd928dc7ac62e66250dbd4bc3f92833dcceffd 100644 --- a/src/opr/impl/blas.sereg.h +++ b/src/opr/impl/blas.sereg.h @@ -97,7 +97,6 @@ MGB_SEREG_OPR(SVD, 1); } // namespace opr - } // namespace mgb // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/impl/dnn/dnn.sereg.h b/src/opr/impl/dnn/dnn.sereg.h index 396446cbaa241a1255d23ee971d5cbbbde948506..bdaa9b06789ebd05114a23bcc5dd0e4d316db41c 100644 --- a/src/opr/impl/dnn/dnn.sereg.h +++ b/src/opr/impl/dnn/dnn.sereg.h @@ -613,7 +613,6 @@ MGB_SEREG_OPR(LSQ, 4); MGB_SEREG_OPR(LSQBackward, 5); } // namespace opr - } // namespace mgb // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/impl/imgproc.sereg.h b/src/opr/impl/imgproc.sereg.h index effacd3daa5d6c8c3f9de3c33e69436d50db6052..7dac473b708c72bfc8c5389d92f7322ef95723d8 100644 --- a/src/opr/impl/imgproc.sereg.h +++ b/src/opr/impl/imgproc.sereg.h @@ -196,7 +196,6 @@ using DctChannelSelectV1 = opr::DctChannelSelect; MGB_SEREG_OPR(DctChannelSelectV1, 0); } // namespace opr - } // namespace mgb // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/impl/misc.sereg.h b/src/opr/impl/misc.sereg.h index 15fc8aaea158bbcb66f0e3c83f87bad5746cfa1c..9da06d1bf09d0488203adb05110f4189a031227b 100644 --- a/src/opr/impl/misc.sereg.h +++ b/src/opr/impl/misc.sereg.h @@ -57,7 +57,6 @@ namespace serialization { } // namespace serialization - namespace opr { MGB_SEREG_OPR(Argmax, 1); diff --git a/src/opr/impl/rand.sereg.h b/src/opr/impl/rand.sereg.h index 8c5c6c22abea01bea281578bae6548ffcac34f4f..68b16ea1f9130a77995f02e781e73e70c95d6baa 100644 --- a/src/opr/impl/rand.sereg.h +++ b/src/opr/impl/rand.sereg.h @@ -14,7 +14,6 @@ namespace mgb { - namespace opr { using UniformRNGV1 = opr::UniformRNG; diff --git a/src/opr/impl/tensor_manip.sereg.h b/src/opr/impl/tensor_manip.sereg.h index f64c1b57172f79c46f0603bfcd6071c2521b0cc4..db3c8d4fee0d49206c036c47ebb1f9a62dfcbece 100644 --- a/src/opr/impl/tensor_manip.sereg.h +++ b/src/opr/impl/tensor_manip.sereg.h @@ -120,7 +120,6 @@ namespace serialization { #endif } // namespace serialization - namespace opr { MGB_SEREG_OPR(Broadcast, 2); MGB_SEREG_OPR(Dimshuffle, 1); diff --git a/src/opr/test/dnn/convolution.cpp b/src/opr/test/dnn/convolution.cpp index 3fd50d80fe403537e26038bdd8aec2c246b9c820..88645a48e010f702bce0170de6d022145ebc36c0 100644 --- a/src/opr/test/dnn/convolution.cpp +++ b/src/opr/test/dnn/convolution.cpp @@ -2401,7 +2401,6 @@ TEST(TestOprDNN, ConvolutionMultiCompNode) { worker0.join(); worker1.join(); } - #endif } // anonymous namespace diff --git a/src/serialization/impl/serializer.cpp b/src/serialization/impl/serializer.cpp index 504abdfe514bc5e3ea4690793936b1c72bccb01e..e24d6132e5ceb7f097b5ff5efc843d1860e90256 100644 --- a/src/serialization/impl/serializer.cpp +++ b/src/serialization/impl/serializer.cpp @@ -37,7 +37,6 @@ GraphLoader::shared_tensor_name_map() { } return ret; } - std::unique_ptr make_fbs_loader(std::unique_ptr file); std::unique_ptr make_fbs_dumper(std::unique_ptr file); bool is_fbs_file(InputFile& file); diff --git a/src/serialization/test/extern_c_opr.cpp b/src/serialization/test/extern_c_opr.cpp index dc153ff85f2bfaf489fd56bb4355cb5d03c8493c..351410d2362caba7b1303aa5ce39095cd44f14aa 100644 --- a/src/serialization/test/extern_c_opr.cpp +++ b/src/serialization/test/extern_c_opr.cpp @@ -502,5 +502,4 @@ TEST(TestExternCOpr, Dedup) { ASSERT_EQ(0, MGBOprDescImpl<>::nr_inst); } - // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index fd4dde5066f25a678433e38bf2441525ee1b082c..2b13c9b45c91528802847586075f6b81ccb7ca19 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,6 @@ if (MGE_WITH_CUDA AND MGE_WITH_TRT) list(APPEND SOURCES ${SOURCES_}) endif() - add_executable(megbrain_test ${SOURCES}) target_link_libraries(megbrain_test gtest gmock) target_link_libraries(megbrain_test megbrain megdnn ${MGE_CUDA_LIBS}) diff --git a/tools/param_defs/mgb_opr_param_defs.py b/tools/param_defs/mgb_opr_param_defs.py index b32d2087d157b3af9a4f0e2108c08ccdbdf6882e..16adfcb00a9be3aa1004f2bfe7ff69875502767a 100644 --- a/tools/param_defs/mgb_opr_param_defs.py +++ b/tools/param_defs/mgb_opr_param_defs.py @@ -63,7 +63,6 @@ pdef('PersistentOutputStorage').add_fields( 'false') ) - (pdef('CollectiveComm', 'collective communication between multiple computing ' 'nodes on localhost') .add_enum(Doc('Mode', 'mode of collective communication'),