#include "lite_build_config.h" #if LITE_BUILD_WITH_MGE #include "common.h" #include "tensor_impl.h" #include "lite/tensor.h" #include "megbrain/comp_node.h" #include "megbrain/tensor.h" #include using namespace lite; /**********************TensorImpl****************************/ LITE_DYN_TYPE_OBJ_FINAL_IMPL(TensorImplDft); TensorImplDft::TensorImplDft() { m_host_tensor = std::make_shared(mgb::CompNode::default_cpu()); } TensorImplDft::TensorImplDft(LiteDeviceType device, bool is_pinned_host) { auto cn = mgb::CompNode::load(to_compnode_locator(device)); if (device == LiteDeviceType::LITE_DEVICE_DEFAULT) { device = LiteDeviceType::LITE_CPU; } if (device == LiteDeviceType::LITE_CPU) { m_host_tensor = std::make_shared(mgb::CompNode::default_cpu()); } else if (is_pinned_host) { m_host_tensor = std::make_shared(cn); } else { m_dev_tensor = std::make_shared(cn); } } TensorImplDft::TensorImplDft( LiteDeviceType device, const Layout& layout, bool is_pinned_host) { auto cn = mgb::CompNode::load(to_compnode_locator(device)); auto mge_layout = to_impl_layout(layout); if (device == LiteDeviceType::LITE_DEVICE_DEFAULT) { device = LiteDeviceType::LITE_CPU; } if (device == LiteDeviceType::LITE_CPU) { m_host_tensor = std::make_shared( mgb::CompNode::default_cpu(), mge_layout); } else if (is_pinned_host) { m_host_tensor = std::make_shared(cn, mge_layout); } else { m_dev_tensor = std::make_shared(cn, mge_layout); } } TensorImplDft::TensorImplDft( int device_id, LiteDeviceType device_type, const Layout& layout, bool is_pinned_host) { auto locator = to_compnode_locator(device_type); locator.device = device_id; auto cn = mgb::CompNode::load(locator); if (device_type == LiteDeviceType::LITE_DEVICE_DEFAULT) { device_type = LiteDeviceType::LITE_CPU; } if (layout.ndim) { auto mge_layout = to_impl_layout(layout); if (device_type == LiteDeviceType::LITE_CPU) { m_host_tensor = std::make_shared( mgb::CompNode::default_cpu(), mge_layout); } else if (is_pinned_host) { m_host_tensor = std::make_shared(cn, mge_layout); } else { m_dev_tensor = std::make_shared(cn, mge_layout); } } else { if (device_type == LiteDeviceType::LITE_CPU) { m_host_tensor = std::make_shared(mgb::CompNode::default_cpu()); } else if (is_pinned_host) { m_host_tensor = std::make_shared(cn); } else { m_dev_tensor = std::make_shared(cn); } } } TensorImplDft::TensorImplDft( int device_id, int stream_id, LiteDeviceType device_type, bool is_pinned_host) { auto locator = to_compnode_locator(device_type); locator.device = device_id; locator.stream = stream_id; auto cn = mgb::CompNode::load(locator); if (get_device_from_locator(locator) == LiteDeviceType::LITE_CPU) { m_host_tensor = std::make_shared(mgb::CompNode::default_cpu()); } else if (is_pinned_host) { m_host_tensor = std::make_shared(cn); } else { m_dev_tensor = std::make_shared(cn); } } LiteDeviceType TensorImplDft::get_device_type() const { if (is_host()) { return get_device_from_locator(m_host_tensor->comp_node().locator()); } else { return get_device_from_locator(m_dev_tensor->comp_node().locator()); } } int TensorImplDft::get_device_id() const { if (is_host()) { return m_host_tensor->comp_node().locator().device; } else { return m_dev_tensor->comp_node().locator().device; } } bool TensorImplDft::is_pinned_host() const { return is_host() && get_device_from_locator(m_host_tensor->comp_node().locator()) != LiteDeviceType::LITE_CPU; } void TensorImplDft::set_mge_tensor_compnode(const mgb::CompNode& comp_node) { if (is_host()) { m_host_tensor->comp_node(comp_node, true); } else { m_dev_tensor->comp_node(comp_node, true); } } Layout TensorImplDft::get_layout() const { if (is_host()) { return to_lite_layout(m_host_tensor->layout()); } else { return to_lite_layout(m_dev_tensor->layout()); } } void* TensorImplDft::get_memory_ptr() const { if (m_get_memory_callback) { m_get_memory_callback(const_cast(this)); } if (is_host()) { return static_cast(m_host_tensor->raw_ptr()); } else { return static_cast(m_dev_tensor->raw_ptr()); } } void* TensorImplDft::get_memory_ptr(const std::vector& idx) const { if (m_get_memory_callback) { m_get_memory_callback(const_cast(this)); } if (is_host()) { auto elemsize_log = m_host_tensor->layout().dtype.size_log(); switch (elemsize_log) { case 0: return static_cast( m_host_tensor->ptr(idx.begin(), idx.end())); break; case 1: return static_cast( m_host_tensor->ptr(idx.begin(), idx.end())); break; case 2: return static_cast( m_host_tensor->ptr(idx.begin(), idx.end())); break; default: LITE_THROW("not supported data_type."); } } else { auto elemsize_log = m_dev_tensor->layout().dtype.size_log(); switch (elemsize_log) { case 0: return static_cast( m_dev_tensor->ptr(idx.begin(), idx.end())); break; case 1: return static_cast( m_dev_tensor->ptr(idx.begin(), idx.end())); break; case 2: return static_cast( m_dev_tensor->ptr(idx.begin(), idx.end())); break; default: LITE_THROW("not supported data_type."); } } } std::shared_ptr TensorImplDft::slice( const std::vector& start, const std::vector& end, const std::vector& step) { Layout layout; mgb::TensorLayout layout_mge; if (is_host()) { layout_mge = m_host_tensor->layout(); layout = to_lite_layout(m_host_tensor->layout()); } else { layout_mge = m_dev_tensor->layout(); layout = to_lite_layout(m_dev_tensor->layout()); } size_t length = start.size(); LITE_ASSERT( length == end.size() && length <= layout.ndim, "The start and end must be the same size and less than layout " "ndim."); std::vector slices; if (step.size()) { LITE_ASSERT(length == step.size(), "The start and step must be the same size."); for (size_t i = 0; i < length; i++) { slices.push_back(mgb::Slice{start[i], end[i], step[i]}); } } else { for (size_t i = 0; i < length; i++) { slices.push_back(mgb::Slice{start[i], end[i]}); } } auto subspec = mgb::SubTensorSpec::make_from_offset_elem(layout_mge, 0); size_t axis = 0; for (auto&& i : slices) { subspec.merge_with(i.apply(subspec.layout(), axis)); axis++; } auto ret = std::make_shared(); auto& impl = TensorHelper::implement(ret)->cast_final_safe(); if (is_host()) { *impl.m_host_tensor = m_host_tensor->sub(subspec); } else { impl.m_dev_tensor = std::make_shared(m_dev_tensor->sub(subspec)); impl.m_host_tensor = nullptr; } LITE_ASSERT(is_host() == impl.is_host()); return ret; } void TensorImplDft::fill_zero() { if (is_host()) { auto mge_layout = m_host_tensor->layout(); if (m_host_tensor->layout().is_physical_contiguous()) { auto ptr = get_memory_ptr(); std::memset(ptr, 0, mge_layout.dtype.size(mge_layout.total_nr_elems())); } else { TensorImplDft tmp( LiteDeviceType::LITE_CPU, to_lite_layout(mge_layout), true); tmp.fill_zero(); this->copy_from(&tmp); } } else { mgb::dev_tensor_memset(*m_dev_tensor, 0); m_dev_tensor->sync(); } } void TensorImplDft::share_memory_with(const TensorImplBase* src_tensor_impl) { auto src_dft_tensor = static_cast(src_tensor_impl); LITE_ASSERT( is_host() == src_dft_tensor->is_host(), "share memory must happen in same device"); //! make shape the src memory is ready src_tensor_impl->get_memory_ptr(); if (is_host()) { *m_host_tensor = *src_dft_tensor->m_host_tensor; } else { *m_dev_tensor = *src_dft_tensor->m_dev_tensor; } } void TensorImplDft::set_layout(const Layout& layout) { bool host = is_host(); auto mgb_layout = to_impl_layout(layout); if (host) { m_host_tensor->dtype(mgb_layout.dtype); m_host_tensor->resize(mgb_layout); } else { m_dev_tensor->dtype(mgb_layout.dtype); m_dev_tensor->resize(mgb_layout); } } void TensorImplDft::reshape(const Layout& layout) { auto mgb_layout = to_impl_layout(layout); bool host = is_host(); if (host) { m_host_tensor->resize(mgb_layout); } else { m_dev_tensor->resize(mgb_layout); } } void TensorImplDft::reset(void* prepared_data) { auto raw_ptr = static_cast(prepared_data); auto raw_storage = std::shared_ptr(raw_ptr, [](void*) {}); bool host = is_host(); if (host) { auto cn = m_host_tensor->comp_node(); auto mge_layout = m_host_tensor->layout(); size_t size = mge_layout.span().dist_byte(); mgb::HostTensorStorage storage; storage.reset(cn, size, raw_storage); if (m_record_reset) { m_host_tensor->only_reset_raw_storage(storage); } else { m_host_tensor->reset(storage, mge_layout); } } else { auto cn = m_dev_tensor->comp_node(); auto mge_layout = m_dev_tensor->layout(); size_t size = mge_layout.span().dist_byte(); mgb::DeviceTensorStorage storage; storage.reset(cn, size, raw_storage); if (m_record_reset) { m_dev_tensor->only_reset_raw_storage(storage); } else { m_dev_tensor->reset(storage, mge_layout); } } if (m_reset_callback) { m_reset_callback(this); } } void TensorImplDft::reset(void* prepared_data, const Layout& layout) { set_layout(layout); reset(prepared_data); } bool TensorImplDft::is_continue_memory() const { if (is_host()) { return m_host_tensor->layout().is_physical_contiguous(); } else { return m_dev_tensor->layout().is_physical_contiguous(); } } void TensorImplDft::copy_from(const TensorImplBase* src_impl) { if (is_continue_memory()) { copy_from_continue(src_impl); } else { copy_from_fixlayout(src_impl); } } void TensorImplDft::copy_from_continue(const TensorImplBase* src_impl) { auto src = static_cast(src_impl); if (is_host()) { //! host to host if (src->is_host()) { m_host_tensor->copy_from(*src->m_host_tensor); //! device to host } else { auto src_cn = src->m_dev_tensor->comp_node(); auto dst_cn = m_host_tensor->comp_node(); if (src_cn != dst_cn && m_host_tensor->layout().ndim > 0) { LITE_WARN( "The dst tensor memroy is alloced before coping, " "then pinned memroy would not use to optmize the " "copy performance."); //! When D2H in megbrain and the compnode of src and dst is not //! equal, there must be one compnode that is cpu-default, so //! here, we use temp tensor for transition auto tmp_impl = std::make_shared(); tmp_impl->set_mge_tensor_compnode(src_cn); tmp_impl->m_host_tensor->copy_from(*src->m_dev_tensor).sync(); m_host_tensor->copy_from(*tmp_impl->m_host_tensor); } else { //! if dst compnode is not valid(memory is not alloced), the //! tensor is pinned host tensor m_host_tensor->comp_node(src_cn, true); m_host_tensor->copy_from(*src->m_dev_tensor).sync(); } } } else { //! host to device if (src->is_host()) { m_dev_tensor->copy_from(*src->m_host_tensor).sync(); //! device to device } else { m_dev_tensor->copy_from(*src->m_dev_tensor).sync(); } } } void TensorImplDft::copy_from_fixlayout(const TensorImplBase* src_impl) { auto src = static_cast(src_impl); if (is_host()) { //! host to host if (src->is_host()) { m_host_tensor->copy_from_fixlayout(*src->m_host_tensor); //! device to host } else { auto src_cn = src->m_dev_tensor->comp_node(); auto dst_cn = m_host_tensor->comp_node(); if (src_cn != dst_cn && m_host_tensor->layout().ndim > 0) { LITE_WARN( "The dst tensor memroy is alloced before coping, " "then pinned memroy would not use to optmize the " "copy performance."); //! When D2H in megbrain and the compnode of src and dst is not //! equal, there must be one compnode that is cpu-default, so //! here, we use temp tensor for transition auto tmp_impl = std::make_shared(); tmp_impl->set_mge_tensor_compnode(src_cn); tmp_impl->m_host_tensor->copy_from(*src->m_dev_tensor).sync(); m_host_tensor->copy_from_fixlayout(*tmp_impl->m_host_tensor); } else { //! if dst compnode is not valid(memory is not alloced), the //! tensor is pinned host tensor m_host_tensor->comp_node(src_cn, true); m_host_tensor->copy_from_fixlayout(*src->m_dev_tensor).sync(); } } } else { //! host to device if (src->is_host()) { m_dev_tensor->copy_from_fixlayout(*src->m_host_tensor).sync(); //! device to device } else { m_dev_tensor->copy_from_fixlayout(*src->m_dev_tensor).sync(); } } } void TensorImplDft::copy_from_mge_tensor(const mgb::DeviceTensorND& dv) { if (is_host()) { auto src_cn = dv.comp_node(); m_host_tensor->comp_node(src_cn, true); m_host_tensor->copy_from(dv); } else { m_dev_tensor->copy_from(dv); } } void TensorImplDft::set_reset_callback(const std::function& cb) { m_reset_callback = cb; } void TensorImplDft::set_get_memory_callback( const std::function& cb) { m_get_memory_callback = cb; } void TensorImplDft::device_share_host_memory() { if (is_host()) { if (!m_dev_tensor) { m_dev_tensor = std::make_shared( m_host_tensor->comp_node(), m_host_tensor->layout()); } if (m_host_tensor->raw_ptr() != m_dev_tensor->raw_ptr()) { auto& host_storage = m_host_tensor->storage(); mgb::DeviceTensorStorage device_storage; device_storage.reset( host_storage.comp_node(), host_storage.size(), host_storage.raw_storage()); m_dev_tensor->only_reset_raw_storage(device_storage); } } } #endif // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}