FaissExecutionEngine.cpp 6.1 KB
Newer Older
X
Xu Peng 已提交
1 2 3 4 5
/*******************************************************************************
 * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
 * Unauthorized copying of this file, via any medium is strictly prohibited.
 * Proprietary and confidential.
 ******************************************************************************/
6
#include "FaissExecutionEngine.h"
G
groot 已提交
7
#include "Log.h"
X
Xu Peng 已提交
8

X
Xu Peng 已提交
9
#include <faiss/AutoTune.h>
10 11 12
#include <faiss/MetaIndexes.h>
#include <faiss/IndexFlat.h>
#include <faiss/index_io.h>
X
Xu Peng 已提交
13
#include <wrapper/Index.h>
X
Xu Peng 已提交
14
#include <wrapper/IndexBuilder.h>
X
Xu Peng 已提交
15
#include <cache/CpuCacheMgr.h>
Y
yu yunfeng 已提交
16
#include "faiss/IndexIVF.h"
Y
yu yunfeng 已提交
17
#include "metrics/Metrics.h"
X
Xu Peng 已提交
18 19 20


namespace zilliz {
J
jinhai 已提交
21
namespace milvus {
X
Xu Peng 已提交
22 23
namespace engine {

24

G
groot 已提交
25 26 27 28 29 30 31 32
FaissExecutionEngine::FaissExecutionEngine(uint16_t dimension,
        const std::string& location,
        const std::string& build_index_type,
        const std::string& raw_index_type)
    : pIndex_(faiss::index_factory(dimension, raw_index_type.c_str())),
      location_(location),
      build_index_type_(build_index_type),
      raw_index_type_(raw_index_type) {
X
Xu Peng 已提交
33 34
}

G
groot 已提交
35 36 37 38
FaissExecutionEngine::FaissExecutionEngine(std::shared_ptr<faiss::Index> index,
        const std::string& location,
        const std::string& build_index_type,
        const std::string& raw_index_type)
X
Xu Peng 已提交
39
    : pIndex_(index),
G
groot 已提交
40 41 42
      location_(location),
      build_index_type_(build_index_type),
      raw_index_type_(raw_index_type) {
X
Xu Peng 已提交
43 44
}

G
groot 已提交
45
Status FaissExecutionEngine::AddWithIds(long n, const float *xdata, const long *xids) {
X
Xu Peng 已提交
46
    pIndex_->add_with_ids(n, xdata, xids);
X
Xu Peng 已提交
47 48 49
    return Status::OK();
}

G
groot 已提交
50
size_t FaissExecutionEngine::Count() const {
X
Xu Peng 已提交
51 52 53
    return (size_t)(pIndex_->ntotal);
}

G
groot 已提交
54
size_t FaissExecutionEngine::Size() const {
55
    return (size_t)(Count() * pIndex_->d)*sizeof(float);
X
Xu Peng 已提交
56 57
}

G
groot 已提交
58 59 60 61
size_t FaissExecutionEngine::Dimension() const {
    return pIndex_->d;
}

G
groot 已提交
62
size_t FaissExecutionEngine::PhysicalSize() const {
63
    return (size_t)(Count() * pIndex_->d)*sizeof(float);
64 65
}

G
groot 已提交
66
Status FaissExecutionEngine::Serialize() {
X
Xu Peng 已提交
67 68
    write_index(pIndex_.get(), location_.c_str());
    return Status::OK();
X
Xu Peng 已提交
69 70
}

G
groot 已提交
71
Status FaissExecutionEngine::Load() {
J
jinhai 已提交
72
    auto index  = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_);
73
    bool to_cache = false;
Y
yu yunfeng 已提交
74
    auto start_time = METRICS_NOW_TIME;
X
Xu Peng 已提交
75 76
    if (!index) {
        index = read_index(location_);
77
        to_cache = true;
G
groot 已提交
78
        ENGINE_LOG_DEBUG << "Disk io from: " << location_;
X
Xu Peng 已提交
79 80 81
    }

    pIndex_ = index->data();
82 83
    if (to_cache) {
        Cache();
Y
yu yunfeng 已提交
84 85 86
        auto end_time = METRICS_NOW_TIME;
        auto total_time = METRICS_MICROSECONDS(start_time, end_time);

Y
yu yunfeng 已提交
87
        server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time);
Y
yu yunfeng 已提交
88 89 90
        double total_size = (pIndex_->d) * (pIndex_->ntotal) * 4;


Y
yu yunfeng 已提交
91
        server::Metrics::GetInstance().FaissDiskLoadSizeBytesHistogramObserve(total_size);
Y
yu yunfeng 已提交
92 93
//        server::Metrics::GetInstance().FaissDiskLoadIOSpeedHistogramObserve(total_size/double(total_time));
        server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(total_size/double(total_time));
94
    }
X
Xu Peng 已提交
95 96 97
    return Status::OK();
}

G
groot 已提交
98
Status FaissExecutionEngine::Merge(const std::string& location) {
99 100 101
    if (location == location_) {
        return Status::Error("Cannot Merge Self");
    }
G
groot 已提交
102 103
    ENGINE_LOG_DEBUG << "Merge index file: " << location << " to: " << location_;

J
jinhai 已提交
104
    auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location);
105 106 107 108 109 110 111 112 113
    if (!to_merge) {
        to_merge = read_index(location);
    }
    auto file_index = dynamic_cast<faiss::IndexIDMap*>(to_merge->data().get());
    pIndex_->add_with_ids(file_index->ntotal, dynamic_cast<faiss::IndexFlat*>(file_index->index)->xb.data(),
            file_index->id_map.data());
    return Status::OK();
}

G
groot 已提交
114 115
ExecutionEnginePtr
FaissExecutionEngine::BuildIndex(const std::string& location) {
G
groot 已提交
116 117
    ENGINE_LOG_DEBUG << "Build index file: " << location << " from: " << location_;

X
Xu Peng 已提交
118 119
    auto opd = std::make_shared<Operand>();
    opd->d = pIndex_->d;
G
groot 已提交
120
    opd->index_type = build_index_type_;
X
Xu Peng 已提交
121 122 123 124 125 126 127 128
    IndexBuilderPtr pBuilder = GetIndexBuilder(opd);

    auto from_index = dynamic_cast<faiss::IndexIDMap*>(pIndex_.get());

    auto index = pBuilder->build_all(from_index->ntotal,
            dynamic_cast<faiss::IndexFlat*>(from_index->index)->xb.data(),
            from_index->id_map.data());

G
groot 已提交
129
    ExecutionEnginePtr new_ee(new FaissExecutionEngine(index->data(), location, build_index_type_, raw_index_type_));
X
Xu Peng 已提交
130 131 132
    return new_ee;
}

G
groot 已提交
133
Status FaissExecutionEngine::Search(long n,
134 135 136 137
                                    const float *data,
                                    long k,
                                    float *distances,
                                    long *labels) const {
Y
yu yunfeng 已提交
138
    auto start_time = METRICS_NOW_TIME;
Y
yu yunfeng 已提交
139 140

    std::shared_ptr<faiss::IndexIVF> ivf_index = std::dynamic_pointer_cast<faiss::IndexIVF>(pIndex_);
Y
yu yunfeng 已提交
141
    //ENGINE_LOG_DEBUG << "Index nlist: " << ivf_index->nlist << ", ntotal: "<< ivf_index->ntotal;
Y
yu yunfeng 已提交
142 143 144 145 146 147 148 149
    if(ivf_index) {
        ENGINE_LOG_DEBUG << "Index type: IVFFLAT nProbe: " << nprobe_;
        ivf_index->nprobe = nprobe_;
        ivf_index->search(n, data, k, distances, labels);
    } else {
        pIndex_->search(n, data, k, distances, labels);
    }

Y
yu yunfeng 已提交
150 151
    auto end_time = METRICS_NOW_TIME;
    auto total_time = METRICS_MICROSECONDS(start_time,end_time);
Y
yu yunfeng 已提交
152
    server::Metrics::GetInstance().QueryIndexTypePerSecondSet(build_index_type_, double(n)/double(total_time));
153 154 155
    return Status::OK();
}

G
groot 已提交
156
Status FaissExecutionEngine::Cache() {
J
jinhai 已提交
157
    zilliz::milvus::cache::CpuCacheMgr::GetInstance(
X
Xu Peng 已提交
158 159 160 161
            )->InsertItem(location_, std::make_shared<Index>(pIndex_));

    return Status::OK();
}
X
Xu Peng 已提交
162

Y
yu yunfeng 已提交
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
Status FaissExecutionEngine::Init() {

    if(build_index_type_ == "IVF") {

        using namespace zilliz::milvus::server;
        ServerConfig &config = ServerConfig::GetInstance();
        ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE);
        nprobe_ = engine_config.GetInt32Value(CONFIG_NPROBE, 1000);

    } else if(build_index_type_ == "IDMap") {
        ;
    } else {
        return Status::Error("Wrong index type: ", build_index_type_);
    }

    return Status::OK();
}

181

X
Xu Peng 已提交
182
} // namespace engine
J
jinhai 已提交
183
} // namespace milvus
X
Xu Peng 已提交
184
} // namespace zilliz