FaissExecutionEngine.cpp 6.8 KB
Newer Older
X
Xu Peng 已提交
1 2 3 4 5
/*******************************************************************************
 * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
 * Unauthorized copying of this file, via any medium is strictly prohibited.
 * Proprietary and confidential.
 ******************************************************************************/
6
#include "FaissExecutionEngine.h"
G
groot 已提交
7
#include "Log.h"
S
starlord 已提交
8
#include "utils/CommonUtil.h"
X
Xu Peng 已提交
9

X
Xu Peng 已提交
10
#include <faiss/AutoTune.h>
11 12 13
#include <faiss/MetaIndexes.h>
#include <faiss/IndexFlat.h>
#include <faiss/index_io.h>
X
Xu Peng 已提交
14
#include <wrapper/Index.h>
X
Xu Peng 已提交
15
#include <wrapper/IndexBuilder.h>
X
Xu Peng 已提交
16
#include <cache/CpuCacheMgr.h>
Y
yu yunfeng 已提交
17
#include "faiss/IndexIVF.h"
Y
yu yunfeng 已提交
18
#include "metrics/Metrics.h"
X
Xu Peng 已提交
19 20 21


namespace zilliz {
J
jinhai 已提交
22
namespace milvus {
X
Xu Peng 已提交
23 24
namespace engine {

25 26 27 28 29 30 31
namespace {
std::string GetMetricType() {
    server::ServerConfig &config = server::ServerConfig::GetInstance();
    server::ConfigNode engine_config = config.GetConfig(server::CONFIG_ENGINE);
    return engine_config.GetValue(server::CONFIG_METRICTYPE, "L2");
}
}
32

G
groot 已提交
33 34 35 36
FaissExecutionEngine::FaissExecutionEngine(uint16_t dimension,
        const std::string& location,
        const std::string& build_index_type,
        const std::string& raw_index_type)
37
    : location_(location),
G
groot 已提交
38 39
      build_index_type_(build_index_type),
      raw_index_type_(raw_index_type) {
40 41 42 43

    std::string metric_type = GetMetricType();
    faiss::MetricType faiss_metric_type = (metric_type == "L2") ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT;
    pIndex_.reset(faiss::index_factory(dimension, raw_index_type.c_str(), faiss_metric_type));
X
Xu Peng 已提交
44 45
}

G
groot 已提交
46 47 48 49
FaissExecutionEngine::FaissExecutionEngine(std::shared_ptr<faiss::Index> index,
        const std::string& location,
        const std::string& build_index_type,
        const std::string& raw_index_type)
X
Xu Peng 已提交
50
    : pIndex_(index),
G
groot 已提交
51 52 53
      location_(location),
      build_index_type_(build_index_type),
      raw_index_type_(raw_index_type) {
X
Xu Peng 已提交
54 55
}

G
groot 已提交
56
Status FaissExecutionEngine::AddWithIds(long n, const float *xdata, const long *xids) {
X
Xu Peng 已提交
57
    pIndex_->add_with_ids(n, xdata, xids);
X
Xu Peng 已提交
58 59 60
    return Status::OK();
}

G
groot 已提交
61
size_t FaissExecutionEngine::Count() const {
X
Xu Peng 已提交
62 63 64
    return (size_t)(pIndex_->ntotal);
}

G
groot 已提交
65
size_t FaissExecutionEngine::Size() const {
66
    return (size_t)(Count() * pIndex_->d)*sizeof(float);
X
Xu Peng 已提交
67 68
}

G
groot 已提交
69 70 71 72
size_t FaissExecutionEngine::Dimension() const {
    return pIndex_->d;
}

G
groot 已提交
73
size_t FaissExecutionEngine::PhysicalSize() const {
S
starlord 已提交
74
    return server::CommonUtil::GetFileSize(location_);
75 76
}

G
groot 已提交
77
Status FaissExecutionEngine::Serialize() {
X
Xu Peng 已提交
78 79
    write_index(pIndex_.get(), location_.c_str());
    return Status::OK();
X
Xu Peng 已提交
80 81
}

G
groot 已提交
82
Status FaissExecutionEngine::Load() {
J
jinhai 已提交
83
    auto index  = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_);
84
    bool to_cache = false;
Y
yu yunfeng 已提交
85
    auto start_time = METRICS_NOW_TIME;
X
Xu Peng 已提交
86 87
    if (!index) {
        index = read_index(location_);
88
        to_cache = true;
G
groot 已提交
89
        ENGINE_LOG_DEBUG << "Disk io from: " << location_;
X
Xu Peng 已提交
90 91 92
    }

    pIndex_ = index->data();
93 94
    if (to_cache) {
        Cache();
Y
yu yunfeng 已提交
95 96 97
        auto end_time = METRICS_NOW_TIME;
        auto total_time = METRICS_MICROSECONDS(start_time, end_time);

Y
yu yunfeng 已提交
98
        server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time);
Y
yu yunfeng 已提交
99 100 101
        double total_size = (pIndex_->d) * (pIndex_->ntotal) * 4;


Y
yu yunfeng 已提交
102
        server::Metrics::GetInstance().FaissDiskLoadSizeBytesHistogramObserve(total_size);
Y
yu yunfeng 已提交
103 104
//        server::Metrics::GetInstance().FaissDiskLoadIOSpeedHistogramObserve(total_size/double(total_time));
        server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(total_size/double(total_time));
105
    }
X
Xu Peng 已提交
106 107 108
    return Status::OK();
}

G
groot 已提交
109
Status FaissExecutionEngine::Merge(const std::string& location) {
110 111 112
    if (location == location_) {
        return Status::Error("Cannot Merge Self");
    }
S
starlord 已提交
113
    ENGINE_LOG_DEBUG << "Merge raw file: " << location << " to: " << location_;
G
groot 已提交
114

J
jinhai 已提交
115
    auto to_merge = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location);
116 117 118 119 120 121 122 123 124
    if (!to_merge) {
        to_merge = read_index(location);
    }
    auto file_index = dynamic_cast<faiss::IndexIDMap*>(to_merge->data().get());
    pIndex_->add_with_ids(file_index->ntotal, dynamic_cast<faiss::IndexFlat*>(file_index->index)->xb.data(),
            file_index->id_map.data());
    return Status::OK();
}

G
groot 已提交
125 126
ExecutionEnginePtr
FaissExecutionEngine::BuildIndex(const std::string& location) {
G
groot 已提交
127 128
    ENGINE_LOG_DEBUG << "Build index file: " << location << " from: " << location_;

X
Xu Peng 已提交
129 130
    auto opd = std::make_shared<Operand>();
    opd->d = pIndex_->d;
G
groot 已提交
131
    opd->index_type = build_index_type_;
132
    opd->metric_type = GetMetricType();
X
Xu Peng 已提交
133 134 135 136 137 138 139 140
    IndexBuilderPtr pBuilder = GetIndexBuilder(opd);

    auto from_index = dynamic_cast<faiss::IndexIDMap*>(pIndex_.get());

    auto index = pBuilder->build_all(from_index->ntotal,
            dynamic_cast<faiss::IndexFlat*>(from_index->index)->xb.data(),
            from_index->id_map.data());

G
groot 已提交
141
    ExecutionEnginePtr new_ee(new FaissExecutionEngine(index->data(), location, build_index_type_, raw_index_type_));
X
Xu Peng 已提交
142 143 144
    return new_ee;
}

G
groot 已提交
145
Status FaissExecutionEngine::Search(long n,
146 147 148 149
                                    const float *data,
                                    long k,
                                    float *distances,
                                    long *labels) const {
Y
yu yunfeng 已提交
150
    auto start_time = METRICS_NOW_TIME;
Y
yu yunfeng 已提交
151 152 153 154 155 156 157 158 159 160

    std::shared_ptr<faiss::IndexIVF> ivf_index = std::dynamic_pointer_cast<faiss::IndexIVF>(pIndex_);
    if(ivf_index) {
        ENGINE_LOG_DEBUG << "Index type: IVFFLAT nProbe: " << nprobe_;
        ivf_index->nprobe = nprobe_;
        ivf_index->search(n, data, k, distances, labels);
    } else {
        pIndex_->search(n, data, k, distances, labels);
    }

Y
yu yunfeng 已提交
161 162
    auto end_time = METRICS_NOW_TIME;
    auto total_time = METRICS_MICROSECONDS(start_time,end_time);
Y
yu yunfeng 已提交
163
    server::Metrics::GetInstance().QueryIndexTypePerSecondSet(build_index_type_, double(n)/double(total_time));
164 165 166
    return Status::OK();
}

G
groot 已提交
167
Status FaissExecutionEngine::Cache() {
S
starlord 已提交
168 169 170
    auto index = std::make_shared<Index>(pIndex_);
    cache::DataObjPtr data_obj = std::make_shared<cache::DataObj>(index, PhysicalSize());
    zilliz::milvus::cache::CpuCacheMgr::GetInstance()->InsertItem(location_, data_obj);
X
Xu Peng 已提交
171 172 173

    return Status::OK();
}
X
Xu Peng 已提交
174

Y
yu yunfeng 已提交
175 176
Status FaissExecutionEngine::Init() {

177 178
    if(build_index_type_ == BUILD_INDEX_TYPE_IVF ||
        build_index_type_ == BUILD_INDEX_TYPE_IVFSQ8) {
Y
yu yunfeng 已提交
179 180 181 182 183

        using namespace zilliz::milvus::server;
        ServerConfig &config = ServerConfig::GetInstance();
        ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE);
        nprobe_ = engine_config.GetInt32Value(CONFIG_NPROBE, 1000);
Y
yu yunfeng 已提交
184
        nlist_ = engine_config.GetInt32Value(CONFIG_NLIST,16384);
Y
yu yunfeng 已提交
185

186
    } else if(build_index_type_ == BUILD_INDEX_TYPE_IDMAP) {
Y
yu yunfeng 已提交
187 188 189 190 191 192 193 194
        ;
    } else {
        return Status::Error("Wrong index type: ", build_index_type_);
    }

    return Status::OK();
}

195

X
Xu Peng 已提交
196
} // namespace engine
J
jinhai 已提交
197
} // namespace milvus
X
Xu Peng 已提交
198
} // namespace zilliz