physical_tensor.cpp 7.3 KB
Newer Older
1
/**
M
Megvii Engine Team 已提交
2 3
 * \file imperative/src/impl/physical_tensor.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
4
 *
5
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
6
 *
M
Megvii Engine Team 已提交
7 8 9
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 11 12 13
 */

#include "megbrain/imperative.h"
#include "megbrain/imperative/blob_manager.h"
14
#include "megbrain/imperative/profiler.h"
15 16

#include "./async_releaser.h"
M
Megvii Engine Team 已提交
17
#include "./event_pool.h"
18

19 20
#include "./profiler/events.h"

21 22 23 24 25 26 27 28 29 30
#include <mutex>

namespace mgb {
namespace imperative {

namespace {

class CompNodeSyncManager : public CompNodeDepedentObject {
    ThinHashMap<Blob*, std::unique_ptr<CompNode::Event>> m_blob2event;
    std::mutex m_mtx;
M
Megvii Engine Team 已提交
31

32
public:
33 34 35 36 37 38 39
#if MGB_CUDA && defined(WIN32)
    //! FIXME: windows cuda driver shutdown before call atexit function even
    //! register atexit function after init cuda driver! as a workround
    //! recovery resource by OS temporarily, may need remove this after
    //! upgrade cuda runtime
    static bool is_into_atexit;
#endif
40 41 42 43 44 45
    std::shared_ptr<void> on_comp_node_finalize() override {
        MGB_LOCK_GUARD(m_mtx);
        m_blob2event.clear();
        return {};
    }

46
    static CompNodeSyncManager& inst() {
47
        static CompNodeSyncManager* sl_inst = new CompNodeSyncManager();
48 49 50 51 52 53 54 55 56 57
#if MGB_CUDA && defined(WIN32)
        //! FIXME: windows cuda driver shutdown before call atexit function even
        //! register atexit function after init cuda driver! as a workround
        //! recovery resource by OS temporarily, may need remove this after
        //! upgrade cuda runtime
        if (!is_into_atexit) {
            auto err = atexit([] { is_into_atexit = true; });
            mgb_assert(!err, "failed to register atexit function");
        }
#endif
58
        return *sl_inst;
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
    }

    CompNode::Event* get_or_create_event(Blob* blob) {
        mgb_assert(!is_finalized());
        MGB_LOCK_GUARD(m_mtx);
        auto&& e = m_blob2event[blob];
        if (!e) {
            e = blob->comp_node().create_event();
        }
        return e.get();
    }

    void remove(Blob* blob) {
        MGB_LOCK_GUARD(m_mtx);
        m_blob2event.erase(blob);
    }
};
76 77 78 79 80 81 82
#if MGB_CUDA && defined(WIN32)
//! FIXME: windows cuda driver shutdown before call atexit function even
//! register atexit function after init cuda driver! as a workround
//! recovery resource by OS temporarily, may need remove this after
//! upgrade cuda runtime
bool CompNodeSyncManager::is_into_atexit = false;
#endif
83

84
}  // namespace
85 86

void EventDeleter::operator()(CompNode::Event* event) {
87
    EventPool::without_timer().free(event);
88 89
}

90
namespace {
M
Megvii Engine Team 已提交
91
std::atomic_uint64_t next_blob_id = 0;
92 93
}

M
Megvii Engine Team 已提交
94 95 96 97
Blob::Blob(const DeviceTensorStorage& s)
        : m_comp_node{s.comp_node()},
          m_storage{s.raw_storage()},
          m_size{s.size() + s.offset()} {
98
    m_id = next_blob_id++;
99 100 101
    BlobManager::inst()->register_blob(this);
}

M
Megvii Engine Team 已提交
102
Blob::Blob(CompNode cn, size_t sz) : m_comp_node{cn}, m_storage{}, m_size{sz} {
103
    m_id = next_blob_id++;
104 105 106 107 108
    BlobManager::inst()->register_blob(this);
}

Blob::~Blob() {
    BlobManager::inst()->unregister_blob(this);
109 110 111 112 113 114 115 116 117

#if MGB_CUDA && defined(WIN32)
    //! FIXME: windows cuda driver shutdown before call atexit function even
    //! register atexit function after init cuda driver! as a workround
    //! recovery resource by OS temporarily, may need remove this after
    //! upgrade cuda runtime
    if (CompNodeSyncManager::is_into_atexit)
        return;
#endif
118
    CompNodeSyncManager::inst().remove(this);
119 120 121 122 123 124 125 126 127
}

const Blob::RawStorage& Blob::storage() {
    if (!m_storage) {
        BlobManager::inst()->alloc_with_defrag(this, m_size);
    }
    return m_storage;
}

M
Megvii Engine Team 已提交
128 129 130
Tensor::Tensor(
        BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv)
        : m_layout(layout), m_blob(std::move(blob)), m_offset(offset), m_value(hv) {}
131

M
Megvii Engine Team 已提交
132
Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) {
133
    m_value = hv;
M
Megvii Engine Team 已提交
134 135 136
    MGB_RECORD_EVENT(
            profiler::HostToDeviceEvent, hv.layout(), hv.comp_node(), hv.raw_ptr(),
            dev_tensor().raw_ptr());
137 138 139
    dev_tensor().copy_from_fixlayout(hv);
    // even though hv is saved in m_value, Tensor itself could be
    // released before copy completes
M
Megvii Engine Team 已提交
140 141 142
    MGB_RECORD_EVENT(
            profiler::HostToDeviceFinishEvent, hv.layout(), hv.comp_node(),
            hv.raw_ptr(), dev_tensor().raw_ptr());
143 144 145
    AsyncReleaser::inst()->add(hv);
}

M
Megvii Engine Team 已提交
146
Tensor::Tensor(const DeviceTensorND& dv, const HostTensorND& hv) {
147 148 149 150 151 152 153 154
    if (!hv.empty()) {
        mgb_assert(dv.comp_node() == hv.comp_node());
        mgb_assert(dv.dtype() == hv.dtype());
        mgb_assert(dv.shape().eq_shape(hv.shape()));
        m_value = hv;
    }
    m_layout = dv.layout();
    m_blob = Blob::make(dv.storage());
155
    m_offset = dv.storage().offset();
156 157 158
}

Tensor::Tensor(const TensorLayout& layout, const CompNode& cn)
M
Megvii Engine Team 已提交
159 160 161
        : m_layout{layout},
          m_blob{Blob::make(cn, layout.span().dist_byte())},
          m_offset{0} {}
162 163

Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout)
M
Megvii Engine Team 已提交
164
        : m_layout{layout}, m_blob{blob}, m_offset{offset} {}
165 166

TensorPtr Tensor::make(const HostTensorND& hv) {
167
    auto&& blob = MultiCNConstTensorCache::inst().lookup(hv);
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
    if (blob) {
        return make(std::forward<decltype(blob)>(blob), hv.layout(), hv);
    }
    return std::make_shared<Tensor>(hv);
}

DeviceTensorND Tensor::dev_tensor() {
    mgb_assert(m_blob, "uninitialized tensor.");
    DeviceTensorStorage storage;
    storage.reset(m_blob->comp_node(), m_blob->size(), m_blob->storage());
    storage = storage.sub(m_offset);
    DeviceTensorND ret;
    ret.reset(storage, m_layout);
    return ret;
}

void Tensor::fetch_value() {
    MGB_LOCK_GUARD(m_mtx);
    if (m_value.empty()) {
        m_value.copy_from(dev_tensor());
188
        m_value_ready.reset(EventPool::without_timer().alloc(comp_node()));
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
        m_value_ready->record();
    }
}

bool Tensor::value_fetched() {
    MGB_LOCK_GUARD(m_mtx);
    return m_value.layout().ndim != 0;
}

const HostTensorND& Tensor::get_value() {
    fetch_value();
    if (m_value_ready) {
        m_value_ready->host_wait();
    }
    return m_value;
}

const HostTensorND* Tensor::try_get_value() {
    MGB_LOCK_GUARD(m_mtx);
    if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) {
        return &m_value;
    }
    return nullptr;
}

TensorPtr Tensor::make_scalar(DTypeScalar value, CompNode cn) {
    HostTensorND hv{cn, value.dtype()};
    hv.resize({1});
    memcpy(hv.raw_ptr(), value.storage(), value.dtype().size(1));
    return make(hv);
}

TensorPtr Tensor::sub(size_t offset, TensorShape shape) {
    TensorLayout layout(shape, m_layout.dtype);
    return Tensor::make(m_blob, offset + m_offset, layout);
}

void Tensor::add_release_callback(CompNode cn) {
    AsyncReleaser::inst()->add(m_blob, cn);
}

CompNode::Event* Tensor::get_or_create_event() {
231
    auto e = CompNodeSyncManager::inst().get_or_create_event(m_blob.get());
232 233 234 235
    e->record();
    return e;
}

236 237
void Tensor::static_initialize() {
    EventPool::with_timer();
238
    EventPool::without_timer();
239 240 241
    AsyncReleaser::inst();
    CompNodeSyncManager::inst();
    MultiCNConstTensorCache::inst();
242 243
}

244 245
}  // namespace imperative
}  // namespace mgb
246 247

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}