FaissExecutionEngine.cpp 4.7 KB
Newer Older
X
Xu Peng 已提交
1 2 3 4 5
/*******************************************************************************
 * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
 * Unauthorized copying of this file, via any medium is strictly prohibited.
 * Proprietary and confidential.
 ******************************************************************************/
6
#include "FaissExecutionEngine.h"
X
Xu Peng 已提交
7

X
Xu Peng 已提交
8 9
#include <easylogging++.h>
#include <faiss/AutoTune.h>
10 11 12
#include <faiss/MetaIndexes.h>
#include <faiss/IndexFlat.h>
#include <faiss/index_io.h>
X
Xu Peng 已提交
13
#include <wrapper/Index.h>
X
Xu Peng 已提交
14
#include <wrapper/IndexBuilder.h>
X
Xu Peng 已提交
15
#include <cache/CpuCacheMgr.h>
Y
yu yunfeng 已提交
16
#include "metrics/Metrics.h"
X
Xu Peng 已提交
17 18 19 20 21 22


namespace zilliz {
namespace vecwise {
namespace engine {

23

G
groot 已提交
24 25 26 27 28 29 30 31
FaissExecutionEngine::FaissExecutionEngine(uint16_t dimension,
        const std::string& location,
        const std::string& build_index_type,
        const std::string& raw_index_type)
    : pIndex_(faiss::index_factory(dimension, raw_index_type.c_str())),
      location_(location),
      build_index_type_(build_index_type),
      raw_index_type_(raw_index_type) {
X
Xu Peng 已提交
32 33
}

G
groot 已提交
34 35 36 37
FaissExecutionEngine::FaissExecutionEngine(std::shared_ptr<faiss::Index> index,
        const std::string& location,
        const std::string& build_index_type,
        const std::string& raw_index_type)
X
Xu Peng 已提交
38
    : pIndex_(index),
G
groot 已提交
39 40 41
      location_(location),
      build_index_type_(build_index_type),
      raw_index_type_(raw_index_type) {
X
Xu Peng 已提交
42 43
}

G
groot 已提交
44
Status FaissExecutionEngine::AddWithIds(long n, const float *xdata, const long *xids) {
X
Xu Peng 已提交
45
    pIndex_->add_with_ids(n, xdata, xids);
X
Xu Peng 已提交
46 47 48
    return Status::OK();
}

G
groot 已提交
49
size_t FaissExecutionEngine::Count() const {
X
Xu Peng 已提交
50 51 52
    return (size_t)(pIndex_->ntotal);
}

G
groot 已提交
53
size_t FaissExecutionEngine::Size() const {
54
    return (size_t)(Count() * pIndex_->d)*sizeof(float);
X
Xu Peng 已提交
55 56
}

G
groot 已提交
57
size_t FaissExecutionEngine::PhysicalSize() const {
58
    return (size_t)(Count() * pIndex_->d)*sizeof(float);
59 60
}

G
groot 已提交
61
Status FaissExecutionEngine::Serialize() {
X
Xu Peng 已提交
62 63
    write_index(pIndex_.get(), location_.c_str());
    return Status::OK();
X
Xu Peng 已提交
64 65
}

G
groot 已提交
66
Status FaissExecutionEngine::Load() {
X
Xu Peng 已提交
67
    auto index  = zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->GetIndex(location_);
68
    bool to_cache = false;
Y
yu yunfeng 已提交
69
    auto start_time = METRICS_NOW_TIME;
X
Xu Peng 已提交
70 71
    if (!index) {
        index = read_index(location_);
72
        to_cache = true;
73
        LOG(DEBUG) << "Disk io from: " << location_;
X
Xu Peng 已提交
74 75 76
    }

    pIndex_ = index->data();
77 78
    if (to_cache) {
        Cache();
Y
yu yunfeng 已提交
79 80 81
        auto end_time = METRICS_NOW_TIME;
        auto total_time = METRICS_MICROSECONDS(start_time, end_time);

Y
yu yunfeng 已提交
82
        server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time);
Y
yu yunfeng 已提交
83 84 85
        double total_size = (pIndex_->d) * (pIndex_->ntotal) * 4;


Y
yu yunfeng 已提交
86
        server::Metrics::GetInstance().FaissDiskLoadSizeBytesHistogramObserve(total_size);
Y
yu yunfeng 已提交
87 88
//        server::Metrics::GetInstance().FaissDiskLoadIOSpeedHistogramObserve(total_size/double(total_time));
        server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(total_size/double(total_time));
89
    }
X
Xu Peng 已提交
90 91 92
    return Status::OK();
}

G
groot 已提交
93
Status FaissExecutionEngine::Merge(const std::string& location) {
94 95 96 97 98 99 100 101 102 103 104 105 106
    if (location == location_) {
        return Status::Error("Cannot Merge Self");
    }
    auto to_merge = zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->GetIndex(location);
    if (!to_merge) {
        to_merge = read_index(location);
    }
    auto file_index = dynamic_cast<faiss::IndexIDMap*>(to_merge->data().get());
    pIndex_->add_with_ids(file_index->ntotal, dynamic_cast<faiss::IndexFlat*>(file_index->index)->xb.data(),
            file_index->id_map.data());
    return Status::OK();
}

G
groot 已提交
107 108
ExecutionEnginePtr
FaissExecutionEngine::BuildIndex(const std::string& location) {
X
Xu Peng 已提交
109 110
    auto opd = std::make_shared<Operand>();
    opd->d = pIndex_->d;
G
groot 已提交
111
    opd->index_type = build_index_type_;
X
Xu Peng 已提交
112 113 114 115 116 117 118 119
    IndexBuilderPtr pBuilder = GetIndexBuilder(opd);

    auto from_index = dynamic_cast<faiss::IndexIDMap*>(pIndex_.get());

    auto index = pBuilder->build_all(from_index->ntotal,
            dynamic_cast<faiss::IndexFlat*>(from_index->index)->xb.data(),
            from_index->id_map.data());

G
groot 已提交
120
    ExecutionEnginePtr new_ee(new FaissExecutionEngine(index->data(), location, build_index_type_, raw_index_type_));
X
Xu Peng 已提交
121 122 123 124
    new_ee->Serialize();
    return new_ee;
}

G
groot 已提交
125
Status FaissExecutionEngine::Search(long n,
126 127 128 129 130 131 132 133 134
                                    const float *data,
                                    long k,
                                    float *distances,
                                    long *labels) const {

    pIndex_->search(n, data, k, distances, labels);
    return Status::OK();
}

G
groot 已提交
135
Status FaissExecutionEngine::Cache() {
X
Xu Peng 已提交
136 137 138 139 140
    zilliz::vecwise::cache::CpuCacheMgr::GetInstance(
            )->InsertItem(location_, std::make_shared<Index>(pIndex_));

    return Status::OK();
}
X
Xu Peng 已提交
141

142

X
Xu Peng 已提交
143 144 145
} // namespace engine
} // namespace vecwise
} // namespace zilliz