physical_tensor.cpp 6.9 KB
Newer Older
1
/**
M
Megvii Engine Team 已提交
2 3
 * \file imperative/src/impl/physical_tensor.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
4
 *
5
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
6
 *
M
Megvii Engine Team 已提交
7 8 9
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 11 12 13
 */

#include "megbrain/imperative.h"
#include "megbrain/imperative/blob_manager.h"
14

15
#include "./event_pool.h"
16 17
#include "./async_releaser.h"

18 19 20 21 22 23 24 25 26 27 28
#include <mutex>

namespace mgb {
namespace imperative {

namespace {

class CompNodeSyncManager : public CompNodeDepedentObject {
    ThinHashMap<Blob*, std::unique_ptr<CompNode::Event>> m_blob2event;
    std::mutex m_mtx;
public:
29 30 31 32 33 34 35
#if MGB_CUDA && defined(WIN32)
    //! FIXME: windows cuda driver shutdown before call atexit function even
    //! register atexit function after init cuda driver! as a workround
    //! recovery resource by OS temporarily, may need remove this after
    //! upgrade cuda runtime
    static bool is_into_atexit;
#endif
36 37 38 39 40 41
    std::shared_ptr<void> on_comp_node_finalize() override {
        MGB_LOCK_GUARD(m_mtx);
        m_blob2event.clear();
        return {};
    }

42 43
    static CompNodeSyncManager& inst() {
        static CompNodeSyncManager sl_inst;
44 45 46 47 48 49 50 51 52 53
#if MGB_CUDA && defined(WIN32)
        //! FIXME: windows cuda driver shutdown before call atexit function even
        //! register atexit function after init cuda driver! as a workround
        //! recovery resource by OS temporarily, may need remove this after
        //! upgrade cuda runtime
        if (!is_into_atexit) {
            auto err = atexit([] { is_into_atexit = true; });
            mgb_assert(!err, "failed to register atexit function");
        }
#endif
54
        return sl_inst;
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
    }

    CompNode::Event* get_or_create_event(Blob* blob) {
        mgb_assert(!is_finalized());
        MGB_LOCK_GUARD(m_mtx);
        auto&& e = m_blob2event[blob];
        if (!e) {
            e = blob->comp_node().create_event();
        }
        return e.get();
    }

    void remove(Blob* blob) {
        MGB_LOCK_GUARD(m_mtx);
        m_blob2event.erase(blob);
    }
};
72 73 74 75 76 77 78
#if MGB_CUDA && defined(WIN32)
//! FIXME: windows cuda driver shutdown before call atexit function even
//! register atexit function after init cuda driver! as a workround
//! recovery resource by OS temporarily, may need remove this after
//! upgrade cuda runtime
bool CompNodeSyncManager::is_into_atexit = false;
#endif
79

80
}  // namespace
81 82

void EventDeleter::operator()(CompNode::Event* event) {
83
    EventPool::without_timer().free(event);
84 85
}

86 87 88 89
namespace {
    std::atomic_uint64_t next_blob_id = 0;
}

90 91
Blob::Blob(const DeviceTensorStorage& s):
    m_comp_node{s.comp_node()}, m_storage{s.raw_storage()},
92
    m_size{s.size() + s.offset()} {
93
    m_id = next_blob_id++;
94 95 96 97 98
    BlobManager::inst()->register_blob(this);
}

Blob::Blob(CompNode cn, size_t sz):
    m_comp_node{cn}, m_storage{}, m_size{sz} {
99
    m_id = next_blob_id++;
100 101 102 103 104
    BlobManager::inst()->register_blob(this);
}

Blob::~Blob() {
    BlobManager::inst()->unregister_blob(this);
105 106 107 108 109 110 111 112 113

#if MGB_CUDA && defined(WIN32)
    //! FIXME: windows cuda driver shutdown before call atexit function even
    //! register atexit function after init cuda driver! as a workround
    //! recovery resource by OS temporarily, may need remove this after
    //! upgrade cuda runtime
    if (CompNodeSyncManager::is_into_atexit)
        return;
#endif
114
    CompNodeSyncManager::inst().remove(this);
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
}

const Blob::RawStorage& Blob::storage() {
    if (!m_storage) {
        BlobManager::inst()->alloc_with_defrag(this, m_size);
    }
    return m_storage;
}

Tensor::Tensor(BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv)
        : m_layout(layout), m_blob(std::move(blob)), m_offset(offset), m_value(hv) {
}

Tensor::Tensor(const HostTensorND &hv)
    : Tensor(hv.layout(), hv.comp_node()) {
    m_value = hv;
    dev_tensor().copy_from_fixlayout(hv);
    // even though hv is saved in m_value, Tensor itself could be
    // released before copy completes
    AsyncReleaser::inst()->add(hv);
}

Tensor::Tensor(const DeviceTensorND &dv, const HostTensorND& hv) {
    if (!hv.empty()) {
        mgb_assert(dv.comp_node() == hv.comp_node());
        mgb_assert(dv.dtype() == hv.dtype());
        mgb_assert(dv.shape().eq_shape(hv.shape()));
        m_value = hv;
    }
    m_layout = dv.layout();
    m_blob = Blob::make(dv.storage());
146
    m_offset = dv.storage().offset();
147 148 149
}

Tensor::Tensor(const TensorLayout& layout, const CompNode& cn)
M
Megvii Engine Team 已提交
150
    : m_layout{layout}, m_blob{Blob::make(cn, layout.span().dist_byte())},
151 152 153 154 155 156
    m_offset{0} {}

Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout)
    : m_layout{layout}, m_blob{blob}, m_offset{offset} {}

TensorPtr Tensor::make(const HostTensorND& hv) {
157
    auto&& blob = MultiCNConstTensorCache::inst().lookup(hv);
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
    if (blob) {
        return make(std::forward<decltype(blob)>(blob), hv.layout(), hv);
    }
    return std::make_shared<Tensor>(hv);
}

DeviceTensorND Tensor::dev_tensor() {
    mgb_assert(m_blob, "uninitialized tensor.");
    DeviceTensorStorage storage;
    storage.reset(m_blob->comp_node(), m_blob->size(), m_blob->storage());
    storage = storage.sub(m_offset);
    DeviceTensorND ret;
    ret.reset(storage, m_layout);
    return ret;
}

void Tensor::fetch_value() {
    MGB_LOCK_GUARD(m_mtx);
    if (m_value.empty()) {
        m_value.copy_from(dev_tensor());
178
        m_value_ready.reset(EventPool::without_timer().alloc(comp_node()));
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
        m_value_ready->record();
    }
}

bool Tensor::value_fetched() {
    MGB_LOCK_GUARD(m_mtx);
    return m_value.layout().ndim != 0;
}

const HostTensorND& Tensor::get_value() {
    fetch_value();
    if (m_value_ready) {
        m_value_ready->host_wait();
    }
    return m_value;
}

const HostTensorND* Tensor::try_get_value() {
    MGB_LOCK_GUARD(m_mtx);
    if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) {
        return &m_value;
    }
    return nullptr;
}

TensorPtr Tensor::make_scalar(DTypeScalar value, CompNode cn) {
    HostTensorND hv{cn, value.dtype()};
    hv.resize({1});
    memcpy(hv.raw_ptr(), value.storage(), value.dtype().size(1));
    return make(hv);
}

TensorPtr Tensor::sub(size_t offset, TensorShape shape) {
    TensorLayout layout(shape, m_layout.dtype);
    return Tensor::make(m_blob, offset + m_offset, layout);
}

void Tensor::add_release_callback(CompNode cn) {
    AsyncReleaser::inst()->add(m_blob, cn);
}

CompNode::Event* Tensor::get_or_create_event() {
221
    auto e = CompNodeSyncManager::inst().get_or_create_event(m_blob.get());
222 223 224 225
    e->record();
    return e;
}

226 227
void Tensor::static_initialize() {
    EventPool::with_timer();
228
    EventPool::without_timer();
229 230 231
    AsyncReleaser::inst();
    CompNodeSyncManager::inst();
    MultiCNConstTensorCache::inst();
232 233
}

234 235
}  // namespace imperative
}  // namespace mgb
236 237

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}