/** * \file src/core/include/megbrain/tensor.h * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #pragma once #include "megbrain/common.h" #include "megbrain/comp_node.h" #include "megbrain/dtype.h" #include "megbrain/utils/metahelper.h" #include "megdnn/basic_types.h" #include #include namespace mgb { using ::megdnn::TensorFormat; using ::megdnn::TensorLayout; using ::megdnn::TensorShape; using ::megdnn::TensorFormatArray; using ::megdnn::TensorLayoutArray; using ::megdnn::TensorShapeArray; /*! * \brief specify how a subtensor resides in a larger one */ class SubTensorSpec { TensorLayout m_layout; ptrdiff_t m_offset_elem = 0; SubTensorSpec(const TensorLayout& l, ptrdiff_t o) : m_layout{l}, m_offset_elem{o} {} public: SubTensorSpec() = default; //! make a SubTensorSpec from given layout and zero offset static SubTensorSpec make_from_layout(const TensorLayout& layout) { return make_from_offset_elem(layout, 0); } //! make a SubTensorSpec from given layout and offset MGE_WIN_DECLSPEC_FUC static SubTensorSpec make_from_offset_elem( const TensorLayout& layout, ptrdiff_t offset_elem); //! get underlying layout const TensorLayout& layout() const { return m_layout; } //! get offset in number of logical elements in the layout ptrdiff_t offset_elem() const { return m_offset_elem; } //! get offset measured in bytes ptrdiff_t offset_byte() const { //! for lowbit cases, offset must aligned to bytes mgb_assert( !m_layout.dtype.is_low_bit() || !(m_offset_elem * m_layout.dtype.low_bit() % 8)); return m_layout.dtype.size(m_offset_elem); } /*! * \brief merge with another SubTensorSpec: accum offset, and replace * layout by rhs */ MGE_WIN_DECLSPEC_FUC void merge_with(const SubTensorSpec& rhs); }; /*! * \brief slice along some axis; index as in Python, with negative indices * supported. Scalar index can also be represented as a Slice, where * m_begin = idx, m_end = idx+1 and m_step = 1. The flag m_is_scalar_idx * indicates whether the Slice comes from a scalar index. */ class Slice { Maybe m_begin, m_end, m_step; bool m_is_scalar_idx; public: Slice(Maybe begin = None, Maybe end = None, Maybe step = None, bool is_scalar_idx = false) : m_begin{begin}, m_end{end}, m_step{step}, m_is_scalar_idx{is_scalar_idx} {} /*! * \brief apply this slice on given tensor layout, and get corresponding * subtensor * \param axis the axis to apply this slice; -1 can be used for * flattened layout */ MGE_WIN_DECLSPEC_FUC SubTensorSpec apply(TensorLayout layout, int axis) const; }; template class TensorStorage; class DeviceTensorStorageTrait; class HostTensorStorageTrait; using HostTensorStorage = TensorStorage; using DeviceTensorStorage = TensorStorage; /*! * \brief manager for raw tensor memory * * It contains no dtype information and all sizes are measured in bytes. * * Note that ensure_size() is lazy, and memory allocation only happens when * ptr() or sub() is called */ template class TensorStorage { public: using RawStorage = std::shared_ptr; TensorStorage() = default; TensorStorage(CompNode comp_node) : m_comp_node(comp_node) {} TensorStorage(TensorStorage&&) noexcept = default; TensorStorage& operator=(TensorStorage&&) noexcept = default; TensorStorage(const TensorStorage& rhs) { *this = rhs; } MGE_WIN_DECLSPEC_FUC TensorStorage& operator=(const TensorStorage& rhs); /*! * \brief whether given tensor span is valid in this storage */ bool valid_span(const TensorLayout::Span& span) const { return m_comp_node.valid() && static_cast(m_offset) + span.low_byte >= 0 && span.high_byte <= size(); } /*! * \brief ensure that its space could hold at least sz bytes * * Note * 1. This method is lazy; size would only be changed when memory * must be accessed. * 2. This method would only grow storage, but it would not release * memory */ MGE_WIN_DECLSPEC_FUC TensorStorage& ensure_size(size_t sz); /*! * \brief return a subtensor that shares the memory; the returned * subtensor is not allowed to realloc * \param offset offset given in bytes */ MGE_WIN_DECLSPEC_FUC TensorStorage sub(ptrdiff_t offset) const; //! apply lazy resize and get ptr dt_byte* ptr() const { return const_cast(this)->apply_lazy_and_get_ptr(); } /*! * \brief usable size in bytes until end of allocated block */ size_t size() const { return m_size; } /*! * \brief offset on allocated block in bytes */ size_t offset() const { return m_offset; } //! get underlying comp node; error would be raised if it is invalid CompNode comp_node() const { check_comp_node_valid(); return m_comp_node; } //! get underlying comp node and allow it to be invalid CompNode comp_node_allow_invalid() const { return m_comp_node; } /*! * \brief whether underlying comp_node is valid */ bool comp_node_valid() const { return m_comp_node.valid(); } /*! * \brief whether this tensor has no valid element (either due to * reaching end of mem chunk or no mem allocated) */ bool empty() const { return !m_size; } /*! * \brief chain-style computing node setter * * note that if allow_mem_node_change is true and memory node is * changed, the underlying data would be released and this tensor would * become empty */ MGE_WIN_DECLSPEC_FUC TensorStorage& comp_node( CompNode node, bool allow_mem_node_change = false); /*! * \brief copy from another TensorStorage, possibly of other storage * type * * This storage must have been initialized * * \param size number of bytes to be copied; must not exceed size of * this or src */ template MGE_WIN_DECLSPEC_FUC void copy_from( const TensorStorage& src, size_t size) const; /*! * \brief reset the tensor storage to given memory area */ MGE_WIN_DECLSPEC_FUC void reset(CompNode node, size_t size, RawStorage data); /*! * \brief make a TensorStorage that shares memory with another * TensorStorage some different storage type * * This method can be used to convert between HostTensorStorage and * DeviceTensorStorage; \p src must be on CPU memory node. */ template < class RTrait, typename = typename std::enable_if< !std::is_same::value>::type> MGE_WIN_DECLSPEC_FUC static TensorStorage make_proxy( const TensorStorage& src); /*! * \brief make a DeviceTensorStorage on default_cpu * that shares memory with this * * this must be a HostTensorStorage. Alignment not checked. */ template < bool x = true, typename = std::enable_if_t< x && std::is_same::value>> DeviceTensorStorage proxy_to_default_cpu() const { ptr(); return {true, CompNode::default_cpu(), m_size, m_capacity, m_offset, m_data}; } //! shortcut for raw_storage().use_count(), but won't trigger lazy alloc size_t use_count() const { if (m_size > m_capacity) { return 1; } return raw_storage().use_count(); } //! whether current capacity is 0 (so we are waiting for lazy init) bool has_no_real_storage() const { return !m_capacity; } //! get underlying raw reference-counted storage const RawStorage& raw_storage() const { ptr(); // apply lazy resize return m_data; } private: template friend class TensorStorage; bool m_allow_realloc = true; CompNode m_comp_node; //! current logical size; may exceed m_capacity and in such case memory //! would be allocate when ptr() is called size_t m_size = 0; //! usable size until end of allocated data block, excluding offset size_t m_capacity = 0; //! offset on m_data size_t m_offset = 0; RawStorage m_data; //! used internally for returning a predefined TensorStorage TensorStorage( bool allow_realloc, CompNode comp_node, size_t size, size_t capacity, size_t offset, const RawStorage& data) : m_allow_realloc(allow_realloc), m_comp_node(comp_node), m_size(size), m_capacity(capacity), m_offset(offset), m_data(data) {} void check_comp_node_valid() const { if (mgb_unlikely(!m_comp_node.valid())) on_invalid_comp_node(); } MGE_WIN_DECLSPEC_FUC dt_byte* apply_lazy_and_get_ptr(); [[noreturn]] MGE_WIN_DECLSPEC_FUC static void on_invalid_comp_node(); }; template class TensorND; using HostTensorND = TensorND; using DeviceTensorND = TensorND; /*! * \brief n-dimensional tensor * * Note that TensorND is built on TensorStorage, which has some lazy behavior. */ template class TensorND { TensorStorage m_storage; TensorLayout m_layout; public: using ChainReturnType = TensorND; MGE_WIN_DECLSPEC_FUC TensorND(); MGE_WIN_DECLSPEC_FUC explicit TensorND(CompNode node); MGE_WIN_DECLSPEC_FUC explicit TensorND(DType dtype); MGE_WIN_DECLSPEC_FUC TensorND(CompNode node, DType dtype); //! allocate contiguous tensor MGE_WIN_DECLSPEC_FUC TensorND( CompNode node, const TensorShape& shape, DType dtype = dtype::Float32{}, TensorFormat format = {}); //! allocate contiguous tensor from given comp node and layout; layout //! is required to be contiguous, and its dtype and format would be used MGE_WIN_DECLSPEC_FUC TensorND(CompNode node, const TensorLayout& layout); /* ================= shape and basic functionality ================= */ //! get subtensor according to given slices MGE_WIN_DECLSPEC_FUC ChainReturnType operator[](std::initializer_list slice) const; //! get subtensor according to spec MGE_WIN_DECLSPEC_FUC ChainReturnType sub(const SubTensorSpec& spec) const; //! whether underlying storage is empty bool empty() const { return m_storage.empty(); } //! whether tensor shape is valid (i.e. ndim != 0) bool shape_valid() const { return m_layout.ndim; } const TensorShape& shape() const { return m_layout; } const TensorLayout& layout() const { return m_layout; } //! shape at given dimension, with boundary check size_t shape(size_t dim) const { mgb_assert(dim < m_layout.ndim); return m_layout.shape[dim]; } //! get ptr at given index template T* ptr(Iter idx_begin, Iter idx_end) { auto ptr = this->template ptr(); size_t nidx = 0; while (idx_begin != idx_end) { mgb_assert(nidx < m_layout.ndim); size_t idx = *idx_begin; mgb_assert(idx < m_layout.shape[nidx]); ptr += m_layout.stride[nidx] * idx; ++idx_begin; ++nidx; } return ptr; } template T* ptr(std::initializer_list idx) { return ptr(idx.begin(), idx.end()); } template const T* ptr(std::initializer_list dim) const { return const_cast(*this).ptr(dim); } //! get ptr of buffer start; *T* must match dtype template T* ptr() const { m_layout.dtype.assert_is_ctype(); return m_storage.ptr()->template as(); } dt_byte* raw_ptr() const { return m_storage.ptr(); } /*! * \brief change the shape without retaining old data, and initialize as * contiguous stride * * dtype and format would not be changed */ MGE_WIN_DECLSPEC_FUC ChainReturnType& resize(const TensorShape& shape); /*! * \brief totally reset the tensor to given storage and layout */ MGE_WIN_DECLSPEC_FUC ChainReturnType& reset( TensorStorage storage, const TensorLayout& layout); /* ================= getter and setters ================= */ /*! * \brief change comp node; see TensorStorage::comp_node() */ MGE_WIN_DECLSPEC_FUC ChainReturnType& comp_node( CompNode comp_node, bool allow_mem_node_change = false); CompNode comp_node() const { return m_storage.comp_node(); } const TensorStorage& storage() const { return m_storage; } /*! * \brief change the storage and invalidate all data, resulting in an * empty tensor */ MGE_WIN_DECLSPEC_FUC ChainReturnType& storage(const TensorStorage& storage); //! get data type DType dtype() const { return m_layout.dtype; } //! get tensor format TensorFormat format() const { return m_layout.format; } /*! * \brief change underlying dtype * * layout would be cleared (reset to ndim=0) if dtype actually changes */ MGE_WIN_DECLSPEC_FUC ChainReturnType& dtype(DType dtype); /*! * \brief change underlying tensor format * * layout would be cleared (reset to ndim=0) if format actually changes */ MGE_WIN_DECLSPEC_FUC ChainReturnType& format(TensorFormat format); /*! * \brief copy from another tensor and initialize contiguous layout * * Note: * 1. If the computing node is empty, it would be copied from src * 2. To copy from device to host, if the two tensors reside on * different computing nodes, the caller is responsible to perform * sync before copying; a better way is to set empty computing node * to host tensor. * 3. For cross-device copy: copy would be synced on comp node of this, * and the caller is responsible to sync this comp node with src comp * node. * 4. If dtype is valid, it would be checked to match the dtype of src. * 5. Format would be reset to default and layout would be initialized * to be contiguous. */ template MGE_WIN_DECLSPEC_FUC ChainReturnType& copy_from(const TensorND& src); /*! * \brief copy from another tensor of the same shape, retaining current * layout * * If storage type of src and this are different and src is not * contiguous, a temporary storage would be allocated to first make src * contiguous. */ template MGE_WIN_DECLSPEC_FUC const ChainReturnType& copy_from_fixlayout( const TensorND& src) const; //! non-const version of copy_from_fixlayout template ChainReturnType& copy_from_fixlayout(const TensorND& src) { return const_cast( static_cast(this)->copy_from_fixlayout(src)); } //! convert to megdnn::TensorND megdnn::TensorND as_megdnn() const { return {const_cast(static_cast(raw_ptr())), m_layout}; } /* ================= misc ================= */ /*! * \brief block host thread to synchronize with the CompNode */ const ChainReturnType& sync() const { comp_node().sync(); return static_cast(*this); } ChainReturnType& sync() { return const_cast( static_cast(this)->sync()); } //! similar to TensorStorage<>::make_proxy template < class RStorage, typename = typename std::enable_if::value>::type> static ChainReturnType make_proxy(const TensorND& src) { ChainReturnType ret; ret.reset(TensorStorage::make_proxy(src.storage()), src.layout()); return ret; } //! similar to HostTensorStorage::proxy_to_default_cpu template < bool x = true, typename = std::enable_if_t< x && std::is_same::value>> DeviceTensorND proxy_to_default_cpu() const { DeviceTensorND ret; ret.reset(storage().proxy_to_default_cpu(), layout()); return ret; } template < bool x = true, typename = std::enable_if_t< x && std::is_same::value>> HostTensorND proxy_to_comp_node(CompNode cn) const { HostTensorStorage host_storage; host_storage.reset(cn, m_storage.size(), m_storage.raw_storage()); HostTensorND ret; ret.reset(host_storage, m_layout); return ret; } }; /*! * \brief call memset in the data of a device tensor */ MGE_WIN_DECLSPEC_FUC void dev_tensor_memset(const DeviceTensorND& tensor, int val); /*! * \brief fill zeros in the content of a dev tensor */ static inline void fill_zero_dev_tensor(const DeviceTensorND& tensor) { dev_tensor_memset(tensor, 0); } } // namespace mgb // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}