physical_tensor.cpp 8.2 KB
Newer Older
1
/**
M
Megvii Engine Team 已提交
2 3
 * \file imperative/src/impl/physical_tensor.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
4
 *
5
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
6
 *
M
Megvii Engine Team 已提交
7 8 9
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 11 12 13
 */

#include "megbrain/imperative.h"
#include "megbrain/imperative/blob_manager.h"
14
#include "megbrain/imperative/profiler.h"
15
#include "megbrain/imperative/resource_manager.h"
16 17

#include "./async_releaser.h"
M
Megvii Engine Team 已提交
18
#include "./event_pool.h"
19

20 21
#include "./profiler/events.h"

22 23 24 25 26 27 28 29 30 31
#include <mutex>

namespace mgb {
namespace imperative {

namespace {

class CompNodeSyncManager : public CompNodeDepedentObject {
    ThinHashMap<Blob*, std::unique_ptr<CompNode::Event>> m_blob2event;
    std::mutex m_mtx;
M
Megvii Engine Team 已提交
32

33 34 35 36 37 38 39
public:
    std::shared_ptr<void> on_comp_node_finalize() override {
        MGB_LOCK_GUARD(m_mtx);
        m_blob2event.clear();
        return {};
    }

40
    static CompNodeSyncManager& inst() {
41
        static auto* sl_inst = ResourceManager::create_global<CompNodeSyncManager>();
42
        return *sl_inst;
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
    }

    CompNode::Event* get_or_create_event(Blob* blob) {
        mgb_assert(!is_finalized());
        MGB_LOCK_GUARD(m_mtx);
        auto&& e = m_blob2event[blob];
        if (!e) {
            e = blob->comp_node().create_event();
        }
        return e.get();
    }

    void remove(Blob* blob) {
        MGB_LOCK_GUARD(m_mtx);
        m_blob2event.erase(blob);
    }
};

61
}  // namespace
62 63

void EventDeleter::operator()(CompNode::Event* event) {
64
    EventPool::without_timer().free(event);
65 66
}

67
namespace {
M
Megvii Engine Team 已提交
68
std::atomic_uint64_t next_blob_id = 0;
69 70
}

M
Megvii Engine Team 已提交
71 72 73 74
Blob::Blob(const DeviceTensorStorage& s)
        : m_comp_node{s.comp_node()},
          m_storage{s.raw_storage()},
          m_size{s.size() + s.offset()} {
75
    m_id = next_blob_id++;
76 77 78
    BlobManager::inst()->register_blob(this);
}

M
Megvii Engine Team 已提交
79
Blob::Blob(CompNode cn, size_t sz) : m_comp_node{cn}, m_storage{}, m_size{sz} {
80
    m_id = next_blob_id++;
81 82 83 84 85
    BlobManager::inst()->register_blob(this);
}

Blob::~Blob() {
    BlobManager::inst()->unregister_blob(this);
86
    CompNodeSyncManager::inst().remove(this);
87 88 89 90 91 92 93 94 95
}

const Blob::RawStorage& Blob::storage() {
    if (!m_storage) {
        BlobManager::inst()->alloc_with_defrag(this, m_size);
    }
    return m_storage;
}

M
Megvii Engine Team 已提交
96 97
Tensor::Tensor(
        BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv)
98 99 100 101 102 103 104
        : m_cn(blob->comp_node()),
          m_shape(layout),
          m_dtype(layout.dtype),
          m_layout(layout),
          m_blob(std::move(blob)),
          m_offset(offset),
          m_value(hv) {}
105

M
Megvii Engine Team 已提交
106
Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) {
107
    constexpr int size_threshold = TensorShape::MAX_NDIM;
108 109
    size_t nr_elems = hv.layout().total_nr_elems();
    if (nr_elems <= size_threshold) {
110 111
        m_value = hv;
    }
112 113 114 115
    if (nr_elems) {
        MGB_RECORD_EVENT(
                profiler::HostToDeviceEvent, hv.layout(), hv.comp_node(), hv.raw_ptr(),
                dev_tensor().raw_ptr());
116 117 118 119 120 121
        DeviceTensorStorage storage;
        storage.reset(m_cn, m_blob->size(), m_blob->storage());
        storage = storage.sub(m_offset);
        DeviceTensorND dv;
        dv.reset(storage, m_layout);
        dv.copy_from_fixlayout(hv);
122 123 124 125 126 127 128
        // even though hv is saved in m_value, Tensor itself could be
        // released before copy completes
        MGB_RECORD_EVENT(
                profiler::HostToDeviceFinishEvent, hv.layout(), hv.comp_node(),
                hv.raw_ptr(), dev_tensor().raw_ptr());
        AsyncReleaser::inst()->add(hv);
    }
129 130
}

131 132 133 134 135 136 137
Tensor::Tensor(const DeviceTensorND& dv, const HostTensorND& hv)
        : m_offset(dv.storage().offset()),
          m_cn(dv.comp_node()),
          m_shape(dv.layout()),
          m_dtype(dv.layout().dtype),
          m_blob(Blob::make(dv.storage())),
          m_layout(dv.layout()) {
138 139 140 141 142 143 144 145 146
    if (!hv.empty()) {
        mgb_assert(dv.comp_node() == hv.comp_node());
        mgb_assert(dv.dtype() == hv.dtype());
        mgb_assert(dv.shape().eq_shape(hv.shape()));
        m_value = hv;
    }
}

Tensor::Tensor(const TensorLayout& layout, const CompNode& cn)
M
Megvii Engine Team 已提交
147 148
        : m_layout{layout},
          m_blob{Blob::make(cn, layout.span().dist_byte())},
149 150 151 152
          m_offset{0},
          m_cn(cn),
          m_shape(layout),
          m_dtype(layout.dtype) {}
153 154

Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout)
155 156 157 158 159 160
        : m_layout{layout},
          m_blob{blob},
          m_offset{offset},
          m_cn(blob->comp_node()),
          m_shape(layout),
          m_dtype(layout.dtype) {}
161 162

TensorPtr Tensor::make(const HostTensorND& hv) {
163
    auto&& blob = MultiCNConstTensorCache::inst().lookup(hv);
164 165 166 167 168 169
    if (blob) {
        return make(std::forward<decltype(blob)>(blob), hv.layout(), hv);
    }
    return std::make_shared<Tensor>(hv);
}

170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
void Tensor::to_contiguous_inplace(VarNode::LayoutConstraintCallback& layout_checker) {
    MGB_LOCK_GUARD(m_blob_mtx);
    if (!m_layout.is_empty() && !layout_checker(m_layout)) {
        DeviceTensorStorage storage;
        storage.reset(m_cn, m_blob->size(), m_blob->storage());
        storage = storage.sub(m_offset);
        DeviceTensorND dv;
        dv.reset(storage, m_layout);

        DeviceTensorND dv_contig;
        dv_contig.copy_from(dv);
        m_layout = dv_contig.layout();
        std::atomic_store(&m_blob, Blob::make(dv_contig.storage()));
        mgb_assert(m_layout.is_contiguous());
        m_offset = 0;
    }
}

void Tensor::to_contiguous_inplace() {
    static VarNode::LayoutConstraintCallback default_cb =
            [](const TensorLayout& layout) { return layout.is_contiguous(); };
    to_contiguous_inplace(default_cb);
}

void Tensor::assign_from_dev_tensor(DeviceTensorND dv) {
    MGB_LOCK_GUARD(m_blob_mtx);
    std::atomic_store(&m_blob, Blob::make(dv.storage()));
    m_offset = dv.storage().offset();
    m_layout = dv.layout();
}

DeviceTensorND Tensor::dev_tensor(bool contiguous) {
202
    mgb_assert(m_blob, "uninitialized tensor.");
203 204 205 206
    if (contiguous) {
        to_contiguous_inplace();
    }
    MGB_LOCK_GUARD(m_blob_mtx);
207
    DeviceTensorStorage storage;
208
    storage.reset(m_cn, m_blob->size(), m_blob->storage());
209 210 211 212 213 214 215
    storage = storage.sub(m_offset);
    DeviceTensorND ret;
    ret.reset(storage, m_layout);
    return ret;
}

void Tensor::fetch_value() {
216 217
    MGB_LOCK_GUARD(m_blob_mtx);
    MGB_LOCK_GUARD(m_value_mtx);
218
    if (m_value.empty()) {
219 220 221 222 223 224
        DeviceTensorStorage storage;
        storage.reset(m_cn, m_blob->size(), m_blob->storage());
        storage = storage.sub(m_offset);
        DeviceTensorND dv;
        dv.reset(storage, m_layout);
        m_value.copy_from(dv);
225
        m_value_ready.reset(EventPool::without_timer().alloc(comp_node()));
226 227 228 229 230
        m_value_ready->record();
    }
}

bool Tensor::value_fetched() {
231
    MGB_LOCK_GUARD(m_value_mtx);
232 233 234 235 236 237 238 239 240 241 242 243
    return m_value.layout().ndim != 0;
}

const HostTensorND& Tensor::get_value() {
    fetch_value();
    if (m_value_ready) {
        m_value_ready->host_wait();
    }
    return m_value;
}

const HostTensorND* Tensor::try_get_value() {
244
    MGB_LOCK_GUARD(m_value_mtx);
245 246 247 248 249 250 251 252 253 254 255 256 257 258
    if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) {
        return &m_value;
    }
    return nullptr;
}

TensorPtr Tensor::make_scalar(DTypeScalar value, CompNode cn) {
    HostTensorND hv{cn, value.dtype()};
    hv.resize({1});
    memcpy(hv.raw_ptr(), value.storage(), value.dtype().size(1));
    return make(hv);
}

TensorPtr Tensor::sub(size_t offset, TensorShape shape) {
259
    TensorLayout layout(shape, m_dtype);
260 261 262 263 264 265 266 267
    return Tensor::make(m_blob, offset + m_offset, layout);
}

void Tensor::add_release_callback(CompNode cn) {
    AsyncReleaser::inst()->add(m_blob, cn);
}

CompNode::Event* Tensor::get_or_create_event() {
268
    auto e = CompNodeSyncManager::inst().get_or_create_event(m_blob.get());
269 270 271 272
    e->record();
    return e;
}

273 274
void Tensor::static_initialize() {
    EventPool::with_timer();
275
    EventPool::without_timer();
276 277 278
    AsyncReleaser::inst();
    CompNodeSyncManager::inst();
    MultiCNConstTensorCache::inst();
279 280
}

281 282
}  // namespace imperative
}  // namespace mgb
283 284

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}