physical_tensor.cpp 6.2 KB
Newer Older
1
/**
M
Megvii Engine Team 已提交
2 3
 * \file imperative/src/impl/physical_tensor.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
4
 *
5
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
6
 *
M
Megvii Engine Team 已提交
7 8 9
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 11 12 13
 */

#include "megbrain/imperative.h"
#include "megbrain/imperative/blob_manager.h"
14
#include "megbrain/imperative/profiler.h"
15
#include "megbrain/imperative/resource_manager.h"
16 17

#include "./async_releaser.h"
M
Megvii Engine Team 已提交
18
#include "./event_pool.h"
19

20 21
#include "./profiler/events.h"

22 23 24 25 26 27 28 29 30 31
#include <mutex>

namespace mgb {
namespace imperative {

namespace {

class CompNodeSyncManager : public CompNodeDepedentObject {
    ThinHashMap<Blob*, std::unique_ptr<CompNode::Event>> m_blob2event;
    std::mutex m_mtx;
M
Megvii Engine Team 已提交
32

33 34 35 36 37 38 39
public:
    std::shared_ptr<void> on_comp_node_finalize() override {
        MGB_LOCK_GUARD(m_mtx);
        m_blob2event.clear();
        return {};
    }

40
    static CompNodeSyncManager& inst() {
41
        static auto* sl_inst = ResourceManager::create_global<CompNodeSyncManager>();
42
        return *sl_inst;
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
    }

    CompNode::Event* get_or_create_event(Blob* blob) {
        mgb_assert(!is_finalized());
        MGB_LOCK_GUARD(m_mtx);
        auto&& e = m_blob2event[blob];
        if (!e) {
            e = blob->comp_node().create_event();
        }
        return e.get();
    }

    void remove(Blob* blob) {
        MGB_LOCK_GUARD(m_mtx);
        m_blob2event.erase(blob);
    }
};

61
}  // namespace
62 63

void EventDeleter::operator()(CompNode::Event* event) {
64
    EventPool::without_timer().free(event);
65 66
}

67
namespace {
M
Megvii Engine Team 已提交
68
std::atomic_uint64_t next_blob_id = 0;
69 70
}

M
Megvii Engine Team 已提交
71 72 73 74
Blob::Blob(const DeviceTensorStorage& s)
        : m_comp_node{s.comp_node()},
          m_storage{s.raw_storage()},
          m_size{s.size() + s.offset()} {
75
    m_id = next_blob_id++;
76 77 78
    BlobManager::inst()->register_blob(this);
}

M
Megvii Engine Team 已提交
79
Blob::Blob(CompNode cn, size_t sz) : m_comp_node{cn}, m_storage{}, m_size{sz} {
80
    m_id = next_blob_id++;
81 82 83 84 85
    BlobManager::inst()->register_blob(this);
}

Blob::~Blob() {
    BlobManager::inst()->unregister_blob(this);
86
    CompNodeSyncManager::inst().remove(this);
87 88 89 90 91 92 93 94 95
}

const Blob::RawStorage& Blob::storage() {
    if (!m_storage) {
        BlobManager::inst()->alloc_with_defrag(this, m_size);
    }
    return m_storage;
}

M
Megvii Engine Team 已提交
96 97 98
Tensor::Tensor(
        BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv)
        : m_layout(layout), m_blob(std::move(blob)), m_offset(offset), m_value(hv) {}
99

M
Megvii Engine Team 已提交
100
Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) {
101
    constexpr int size_threshold = TensorShape::MAX_NDIM;
102 103
    size_t nr_elems = hv.layout().total_nr_elems();
    if (nr_elems <= size_threshold) {
104 105
        m_value = hv;
    }
106 107 108 109 110 111 112 113 114 115 116 117
    if (nr_elems) {
        MGB_RECORD_EVENT(
                profiler::HostToDeviceEvent, hv.layout(), hv.comp_node(), hv.raw_ptr(),
                dev_tensor().raw_ptr());
        dev_tensor().copy_from_fixlayout(hv);
        // even though hv is saved in m_value, Tensor itself could be
        // released before copy completes
        MGB_RECORD_EVENT(
                profiler::HostToDeviceFinishEvent, hv.layout(), hv.comp_node(),
                hv.raw_ptr(), dev_tensor().raw_ptr());
        AsyncReleaser::inst()->add(hv);
    }
118 119
}

M
Megvii Engine Team 已提交
120
Tensor::Tensor(const DeviceTensorND& dv, const HostTensorND& hv) {
121 122 123 124 125 126 127 128
    if (!hv.empty()) {
        mgb_assert(dv.comp_node() == hv.comp_node());
        mgb_assert(dv.dtype() == hv.dtype());
        mgb_assert(dv.shape().eq_shape(hv.shape()));
        m_value = hv;
    }
    m_layout = dv.layout();
    m_blob = Blob::make(dv.storage());
129
    m_offset = dv.storage().offset();
130 131 132
}

Tensor::Tensor(const TensorLayout& layout, const CompNode& cn)
M
Megvii Engine Team 已提交
133 134 135
        : m_layout{layout},
          m_blob{Blob::make(cn, layout.span().dist_byte())},
          m_offset{0} {}
136 137

Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout)
M
Megvii Engine Team 已提交
138
        : m_layout{layout}, m_blob{blob}, m_offset{offset} {}
139 140

TensorPtr Tensor::make(const HostTensorND& hv) {
141
    auto&& blob = MultiCNConstTensorCache::inst().lookup(hv);
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
    if (blob) {
        return make(std::forward<decltype(blob)>(blob), hv.layout(), hv);
    }
    return std::make_shared<Tensor>(hv);
}

DeviceTensorND Tensor::dev_tensor() {
    mgb_assert(m_blob, "uninitialized tensor.");
    DeviceTensorStorage storage;
    storage.reset(m_blob->comp_node(), m_blob->size(), m_blob->storage());
    storage = storage.sub(m_offset);
    DeviceTensorND ret;
    ret.reset(storage, m_layout);
    return ret;
}

void Tensor::fetch_value() {
    MGB_LOCK_GUARD(m_mtx);
    if (m_value.empty()) {
        m_value.copy_from(dev_tensor());
162
        m_value_ready.reset(EventPool::without_timer().alloc(comp_node()));
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
        m_value_ready->record();
    }
}

bool Tensor::value_fetched() {
    MGB_LOCK_GUARD(m_mtx);
    return m_value.layout().ndim != 0;
}

const HostTensorND& Tensor::get_value() {
    fetch_value();
    if (m_value_ready) {
        m_value_ready->host_wait();
    }
    return m_value;
}

const HostTensorND* Tensor::try_get_value() {
    MGB_LOCK_GUARD(m_mtx);
    if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) {
        return &m_value;
    }
    return nullptr;
}

TensorPtr Tensor::make_scalar(DTypeScalar value, CompNode cn) {
    HostTensorND hv{cn, value.dtype()};
    hv.resize({1});
    memcpy(hv.raw_ptr(), value.storage(), value.dtype().size(1));
    return make(hv);
}

TensorPtr Tensor::sub(size_t offset, TensorShape shape) {
    TensorLayout layout(shape, m_layout.dtype);
    return Tensor::make(m_blob, offset + m_offset, layout);
}

void Tensor::add_release_callback(CompNode cn) {
    AsyncReleaser::inst()->add(m_blob, cn);
}

CompNode::Event* Tensor::get_or_create_event() {
205
    auto e = CompNodeSyncManager::inst().get_or_create_event(m_blob.get());
206 207 208 209
    e->record();
    return e;
}

210 211
void Tensor::static_initialize() {
    EventPool::with_timer();
212
    EventPool::without_timer();
213 214 215
    AsyncReleaser::inst();
    CompNodeSyncManager::inst();
    MultiCNConstTensorCache::inst();
216 217
}

218 219
}  // namespace imperative
}  // namespace mgb
220 221

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}