/** * \file dnn/src/common/utils.h * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #pragma once #include "megdnn/arch.h" #include "megdnn/basic_types.h" #include "megdnn/dtype.h" #include "megdnn/handle.h" #include "megdnn/thin/small_vector.h" #include "megdnn/oprs/general.h" #include "src/common/hash_ct.h" #include "src/common/utils.cuh" #include #include #include #include #include #include #include #if defined(_WIN32) #include #endif #if __cplusplus >= 201703L || __clang_major__ >= 4 #define MEGDNN_FALLTHRU [[fallthrough]]; #elif __GNUC__ >= 7 #define MEGDNN_FALLTHRU __attribute__ ((fallthrough)); #else #define MEGDNN_FALLTHRU #endif #define rep(i, n) for (auto i = decltype(n){0}; i < (n); ++i) #define rep_step(i, n, step) for (auto i = decltype(n){0}; i < (n); i += (step)) #define megdnn_assert_contiguous(layout) \ do { \ megdnn_assert((layout).is_contiguous(), "%s is %s.", #layout, \ (layout).to_string().c_str()); \ } while (0) #define megdnn_assert_non_overlapping_strong(layout) \ do { \ megdnn_assert((layout).is_non_overlapping_strong(), "%s is %s.", \ #layout, (layout).to_string().c_str()); \ } while (0) #define megdnn_assert_eq_size_t(lhs_, rhs_) \ do { \ size_t lhs = lhs_, rhs = rhs_; \ megdnn_assert(lhs == rhs, "%s is %zu, %s is %zu.", #lhs_, lhs, #rhs_, \ rhs); \ } while (0) #define megdnn_assert_eq_layout(lhs, rhs) \ do { \ megdnn_assert(lhs.eq_layout(rhs), "%s is %s, %s is %s.", #lhs, \ lhs.to_string().c_str(), #rhs, rhs.to_string().c_str()); \ } while (0) #define megdnn_assert_eq_shape(lhs, rhs) \ do { \ megdnn_assert(lhs.eq_shape(rhs), "%s is %s, %s is %s.", #lhs, \ lhs.to_string().c_str(), #rhs, rhs.to_string().c_str()); \ } while (0) #define megdnn_assert_eq_dtype(lhs, rhs) \ do { \ megdnn_assert(lhs.dtype == rhs.dtype, "%s is %s, %s is %s.", #lhs, \ lhs.dtype.name(), #rhs, rhs.dtype.name()); \ } while (0) #define megdnn_layout_msg(layout) \ std::string(megdnn_mangle(#layout "=" + (layout).to_string())) #define MEGDNN_LOCK_GUARD(var) \ std::lock_guard> _lock_guard_##var { var } namespace megdnn { /* ================ logging ================ */ #define megdnn_log_debug(fmt...) \ _megdnn_do_log(::megdnn::LogLevel::DEBUG, __FILE__, __func__, __LINE__, fmt) #define megdnn_log(fmt...) \ _megdnn_do_log(::megdnn::LogLevel::INFO, __FILE__, __func__, __LINE__, fmt) #define megdnn_log_warn(fmt...) \ _megdnn_do_log(::megdnn::LogLevel::WARN, __FILE__, __func__, __LINE__, fmt) #define megdnn_log_error(fmt...) \ _megdnn_do_log(::megdnn::LogLevel::ERROR, __FILE__, __func__, __LINE__, fmt) #if MEGDNN_ENABLE_LOGGING void __log__(LogLevel level, const char* file, const char* func, int line, const char* fmt, ...) __attribute__((format(printf, 5, 6))); #define _megdnn_do_log ::megdnn::__log__ #else #define _megdnn_do_log(...) \ do { \ } while (0) #endif // megdnn_ENABLE_LOGGING /* helper functions */ /** * \brief Get the next `stride' index lexicographically. * * stride must be divisible by the last dimension shape. * \return true if index is updated successfully, false otherwise (index is * already the last one, next index does not exist) */ bool get_next_addr(size_t* index, const size_t* shape, size_t n, size_t stride = 1); size_t get_linear_addr(size_t* index, const size_t* shape, size_t n); int get_linear_addr_noncont(size_t* index, const TensorLayout& layout); size_t infer_conv_shape(size_t inp, size_t flt, size_t stride, size_t pad, bool is_floor = true); void infer_conv_shape2d(size_t ih, size_t iw, size_t fh, size_t fw, size_t sh, size_t sw, size_t ph, size_t pw, size_t& oh, size_t& ow, bool is_floor = true); template SmallVector apply_vector(Func&& func, const SmallVector& vec); std::string ssprintf(const char* fmt, ...) __attribute__((format(printf, 1, 2))); /*! * \brief transpose (m*n) matrix to (n*m) matrix * * -1 in \p lds and \p ldd means default leading dimensions (= nr. columns) * * Note that transpose and transpose_knc2nsck are implemented in x86/utils.cpp * and arm_common/util.cpp, subject to the target platform. * */ template void transpose(const dtype* src, dtype* dst, size_t m, size_t n, ptrdiff_t lds = -1, ptrdiff_t ldd = -1); /*! * transpose src with contiguous layout (k, n, c) into dst with shape * (n, c, k), with given stride (\p n_stride) on first dimension */ template void transpose_knc2nsck(const dtype* src, dtype* dst, size_t k, size_t n, size_t c, size_t n_stride); /*! * \brief divide get result ceiled to int; both dividend and divisor shoud be * non-negative */ template int_t div_ceil(int_t dividend, int_t divisor); /*! * \brief divide get result floored to int; both dividend and divisor shoud be * non-negative */ template int_t div_floor(int_t dividend, int_t divisor); /*! * \brief get geometric mean of a and b */ inline dt_float32 geometric_mean(dt_float32 a, dt_float32 b) { return std::sqrt(a * b); } /*! * \brief calculate x*x */ template num_t sqr(num_t x) { return x * x; } template std::unique_ptr make_unique(Args&&... args) { return std::unique_ptr(new T(std::forward(args)...)); } /** * \brief Aligned workspace bundle. * * Each individual workspace is aligned to align_in_bytes. */ class WorkspaceBundle { public: WorkspaceBundle(void* ptr = nullptr, SmallVector sizes_in_bytes = {}, size_t align_in_bytes = 512); /** * construct 2D workspace buldle */ WorkspaceBundle(SmallVector> vector_sizes_in_bytes, void* ptr, size_t align_in_bytes = 512); /** * \returns raw workspace ptr. * * Note that ptr() is different than get(0), in that * the result of ptr() is possibly not aligned. */ void* ptr() const; /** * \returns the 2D [dim1, dim0] workspace ptr (aligned) */ void* get(size_t dim1, size_t dim0) const; /** * \returns the 1D [dim0] workspace ptr (aligned) */ void* get(size_t dim0) const; /** * \returns total size taking into account paddings to solve alignment * issue. */ size_t total_size_in_bytes() const; /** * \return the 2D [dim1, dim0] workspace size */ size_t get_size(size_t dim1, size_t dim0) const; /** * \return the 1D [dim0] workspace size */ size_t get_size(size_t dim0) const; size_t nr_workspace() const; void set(void* ptr); Workspace get_workspace(size_t dim1, size_t dim0) const { return {static_cast(get(dim1, dim0)), get_size(dim1, dim0)}; } Workspace get_workspace(size_t dim0) const { return {static_cast(get(dim0)), get_size(dim0)}; } private: void* m_ptr; SmallVector> m_sizes; SmallVector m_aligned_sizes; //! all workspace size prefix sum SmallVector m_reduce_sizes; //! dim1 workspace number prefix sum SmallVector m_reduce_num; size_t m_align_in_bytes; }; MEGDNN_CONSTEXPR std::size_t operator"" _z(unsigned long long n) { return n; } constexpr uint32_t operator"" _hash(char const* str, size_t count) { return XXHash64CT::hash(str, count, 20160701); } template std::string vec2str(Vec&& vec) { std::string res; res.append("{"); for (size_t i = 0; i < vec.size(); ++i) { res.append(std::to_string(vec[i])); if (i + 1 < vec.size()) res.append(","); } res.append("}"); return res; } // facilitate tile and repeat size_t count_not_ones_in_shape(const TensorShape& shape); /*! * \brief whether a TensorLayout is of NHWC format and contiguous on the W and * C dimensions. * * if true, it implies that a TensorND with given layout is convertible to * a Mat for the use of cv algorithms. */ bool is_nhwc_contig_wc(const TensorLayout& layout); static inline void copy_plane_in_bytes(void* dst, const void* src, size_t height, size_t width, size_t stride_dst, size_t stride_src) { for (size_t h = 0; h < height; ++h) { std::memcpy(static_cast(dst) + h * stride_dst, static_cast(src) + h * stride_src, width); } } megcoreDeviceHandle_t get_device_handle(Handle* handle); static inline void incr_voidp(void*& ptr, ptrdiff_t delta) { ptr = reinterpret_cast(reinterpret_cast(ptr) + delta); } /*! * \brief align *val* to be multiples of *align* * \param align required alignment, which must be power of 2 */ template static inline T get_aligned_power2(T val, T align) { auto d = val & (align - 1); val += (align - d) & (align - 1); return val; } template inline T saturate(S x, S lower, S upper) { //! in(nan) -> out(lower) : //! match the meaning with fmax(in dtype.h) when dealing with nan S val = x > upper ? upper : (x >= lower ? x : lower); return static_cast(val); } /*! * \brief divide get result ceiled to int; both dividend and divisor shoud be * non-negative */ template int_t div_ceil(int_t dividend, int_t divisor) { static_assert(std::is_integral::value, "must be integers"); megdnn_assert_internal(dividend >= 0); megdnn_assert_internal(divisor > 0); return (dividend + divisor - 1) / divisor; } /*! * \brief divide get result floored to int; both dividend and divisor shoud be * non-negative */ template int_t div_floor(int_t dividend, int_t divisor) { static_assert(std::is_integral::value, "must be integers"); megdnn_assert_internal(dividend >= 0); megdnn_assert_internal(divisor > 0); return dividend / divisor; } /*! * \brief round result to multiply of divisor; both dividend and divisor shoud * be non-negative */ template int_t round_up(int_t dividend, int_t divisor) { static_assert(std::is_integral::value, "must be integers"); megdnn_assert_internal(dividend >= 0); megdnn_assert_internal(divisor > 0); return ((dividend + divisor - 1) / divisor) * divisor; } template SmallVector apply_vector(Func&& func, const SmallVector& vec) { SmallVector res(vec.size()); std::transform(vec.begin(), vec.end(), res.begin(), func); return res; } template struct SafeMultiplies; template struct _SafeMultipliesImplUnsigned : public std::binary_function { static MEGDNN_CONSTEXPR size_t nbits = sizeof(T) * 8; static size_t clz(unsigned x) { size_t n; #if defined(_MSC_VER) DWORD leading_zero; _BitScanReverse(&leading_zero, x); n = 31 - leading_zero; #else n = __builtin_clz(x); #endif return x ? n : nbits; } static size_t clz(unsigned long x) { size_t n; #if defined(_MSC_VER) DWORD leading_zero; _BitScanReverse(&leading_zero, x); n = 31 - leading_zero; #else n = __builtin_clzl(x); #endif return x ? n : nbits; } static size_t clz(unsigned long long x) { size_t n; #if defined(_MSC_VER) DWORD leading_zero; _BitScanReverse64(&leading_zero, x); n = 63 - leading_zero; #else n = __builtin_clzll(x); #endif return x ? n : nbits; } T operator()(const T& x, const T& y) const { int overflow = clz(x) + clz(y) + 2 <= nbits; T t = x * (y >> 1); // clz(x)+clz(y/2) >= nbits, t must not overflow overflow |= t >> (nbits - 1); t <<= 1; auto yodd = y & 1; t += yodd ? x : 0; overflow |= yodd & (t < x); megdnn_assert(!overflow, "multiply overflow: %s %s", std::to_string(x).c_str(), std::to_string(y).c_str()); return t; } template U operator()(const U&, const V&) const { static_assert( // can not be true std::is_same::value && std::is_same::value, "implicit conversion disallowed in SafeMultiplies"); megdnn_trap(); } }; template <> struct SafeMultiplies : public _SafeMultipliesImplUnsigned {}; template bool vec_contains(const std::vector& vec, const T& elem) { return std::find(vec.begin(), vec.end(), elem) != vec.end(); } template bool vec_contains(const SmallVector& vec, const T& elem) { return std::find(vec.begin(), vec.end(), elem) != vec.end(); } float mul_scale(DType lhs, DType rhs); template dtype convert(stype src, dtype dst, size_t offset); template <> uint8_t convert(dt_quint4 src, uint8_t dst, size_t offset); template <> dt_quint4 convert(uint8_t src, dt_quint4 dst, size_t offset); template <> int8_t convert(dt_qint4 src, int8_t dst, size_t offset); template <> dt_qint4 convert(int8_t src, dt_qint4 dst, size_t offset); /*! * \brief check float equal within given ULP(unit in the last place) */ template static inline typename std::enable_if::is_integer, bool>::type almost_equal(T x, T y, int unit_last_place = 1) { return std::abs(x - y) < (std::numeric_limits::epsilon() * std::abs(x + y) * unit_last_place) || std::abs(x - y) < std::numeric_limits::min(); } bool dtype_almost_equal(DType lhs, DType rhs); /** * \brief N-dimensional index space */ class CpuNDRange { static MEGDNN_CONSTEXPR size_t MAX_NDIM = MEGDNN_MAX_NDIM; private: size_t m_dim[MAX_NDIM]; size_t m_dimension; public: //! \brief Constructs seven-dimensional range. CpuNDRange(size_t size0, size_t size1, size_t size2, size_t size3, size_t size4, size_t size5, size_t size6) : m_dimension(7) { m_dim[0] = size0; m_dim[1] = size1; m_dim[2] = size2; m_dim[3] = size3; m_dim[4] = size4; m_dim[5] = size5; m_dim[6] = size6; } //! \brief Constructs range has zero dimensions. CpuNDRange() : CpuNDRange(1, 1, 1, 1, 1, 1, 1) { m_dimension = 0; } //! \brief Constructs one-dimensional range. CpuNDRange(size_t size0) : CpuNDRange(size0, 1, 1, 1, 1, 1, 1) { m_dimension = 1; } //! \brief Constructs two-dimensional range. CpuNDRange(size_t size0, size_t size1) : CpuNDRange(size0, size1, 1, 1, 1, 1, 1) { m_dimension = 2; } //! \brief Constructs three-dimensional range. CpuNDRange(size_t size0, size_t size1, size_t size2) : CpuNDRange(size0, size1, size2, 1, 1, 1, 1) { m_dimension = 3; } //! \brief Constructs four-dimensional range. CpuNDRange(size_t size0, size_t size1, size_t size2, size_t size3) : CpuNDRange(size0, size1, size2, size3, 1, 1, 1) { m_dimension = 4; } //! \brief Constructs five-dimensional range. CpuNDRange(size_t size0, size_t size1, size_t size2, size_t size3, size_t size4) : CpuNDRange(size0, size1, size2, size3, size4, 1, 1) { m_dimension = 5; } //! \brief Constructs six-dimensional range. CpuNDRange(size_t size0, size_t size1, size_t size2, size_t size3, size_t size4, size_t size5) : CpuNDRange(size0, size1, size2, size3, size4, size5, 1) { m_dimension = 6; } //! \brief Constructs every dim from global CpuNDRange(const CpuNDRange& dims, size_t global) { m_dimension = dims.dimension(); for (int i = m_dimension - 1; i >= 0; i--) { m_dim[i] = global % dims[i]; global /= dims[i]; } } //! \brief Queries the number of dimensions in the range. size_t dimension() const { return m_dimension; } //! \brief Returns the size of the object in bytes based on the // runtime number of dimensions size_t size() const { return m_dimension * sizeof(size_t); } size_t* get() { return m_dimension ? m_dim : nullptr; } size_t& operator[](size_t idx); size_t& operator[](size_t idx) const { return const_cast(this)->operator[](idx); }; const size_t* get() const { return const_cast(this)->get(); } size_t total_size() const { size_t ret = 1; for (size_t i = 0; i < m_dimension; i++) { ret *= m_dim[i]; } return ret; } //! \brief get the dims string std::string to_string() const; }; /**! * \brief helpers for oprs using typecvt between comp_type and dst_type * \tparam SrcType src type * \tparam CompType compute type, such as fp32 for conv * \tparam DstType dst type */ template struct CompTypeCvter { std::unique_ptr m_cvt_opr; WorkspaceBundle* m_workspace_bundle; size_t m_workspace_idx; CompTypeCvter(Handle* handle, WorkspaceBundle* bundle) : m_workspace_bundle(bundle), m_workspace_idx(0) { megdnn_assert( (DTypeTrait::enumv != DTypeTrait::enumv && DTypeTrait::enumv != DTypeTrait::enumv), "SrcType(%s) == CompType(%s) or DstType(%s) == CompType(%s) is " "not " "supportted.", SrcType().name(), CompType().name(), DstType().name(), CompType().name()); m_cvt_opr = handle->create_operator(); } //! Convert tensor dtype from SrcType to CompType. CompTypeCvter& src_to_comp_type(const TensorND& src, TensorND& comp) { if (src.layout.dtype.enumv() == DTypeTrait::enumv) { if (!comp.layout.dtype.valid() || comp.layout.dtype.enumv() != DTypeTrait::enumv) { comp.layout.dtype = CompType(); comp.layout.init_contiguous_stride(); comp.raw_ptr = m_workspace_bundle->get(m_workspace_idx++); if (src.layout.ndim) { m_cvt_opr->exec(src, comp); } } } return *this; } //! Convert tensor dtype from CompType to DstType. CompTypeCvter& comp_to_dst_type(const TensorND& comp, const TensorND& dst) { megdnn_assert(comp.layout.dtype.enumv() == DTypeTrait::enumv); if (dst.layout.dtype.enumv() == DTypeTrait::enumv) { m_cvt_opr->exec(comp, dst); } return *this; } Workspace workspace() { return m_workspace_bundle->get_workspace(m_workspace_idx); } }; } // namespace megdnn // vim: syntax=cpp.doxygen