Utils.cpp 10.1 KB
Newer Older
1
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
G
groot 已提交
2
//
3 4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
G
groot 已提交
5
//
6 7 8 9 10
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
G
groot 已提交
11

Y
yukun 已提交
12
#include "examples/utils/Utils.h"
G
groot 已提交
13 14 15

#include <time.h>
#include <unistd.h>
16

G
groot 已提交
17 18
#include <iostream>
#include <memory>
Y
yukun 已提交
19
#include <random>
G
groot 已提交
20 21 22
#include <utility>
#include <vector>

23 24
#include "examples/utils/TimeRecorder.h"

G
groot 已提交
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
namespace milvus_sdk {

constexpr int64_t SECONDS_EACH_HOUR = 3600;

#define BLOCK_SPLITER std::cout << "===========================================" << std::endl;

std::string
Utils::CurrentTime() {
    time_t tt;
    time(&tt);
    tt = tt + 8 * SECONDS_EACH_HOUR;
    tm t;
    gmtime_r(&tt, &t);

    std::string str = std::to_string(t.tm_year + 1900) + "_" + std::to_string(t.tm_mon + 1) + "_" +
                      std::to_string(t.tm_mday) + "_" + std::to_string(t.tm_hour) + "_" + std::to_string(t.tm_min) +
                      "_" + std::to_string(t.tm_sec);

    return str;
}

std::string
Utils::CurrentTmDate(int64_t offset_day) {
    time_t tt;
    time(&tt);
    tt = tt + 8 * SECONDS_EACH_HOUR;
    tt = tt + 24 * SECONDS_EACH_HOUR * offset_day;
    tm t;
    gmtime_r(&tt, &t);

    std::string str =
        std::to_string(t.tm_year + 1900) + "-" + std::to_string(t.tm_mon + 1) + "-" + std::to_string(t.tm_mday);

    return str;
}

void
Utils::Sleep(int seconds) {
    std::cout << "Waiting " << seconds << " seconds ..." << std::endl;
    sleep(seconds);
}

const std::string&
G
groot 已提交
68 69
Utils::GenCollectionName() {
    static std::string s_id("C_" + CurrentTime());
G
groot 已提交
70 71 72 73 74 75
    return s_id;
}

std::string
Utils::MetricTypeName(const milvus::MetricType& metric_type) {
    switch (metric_type) {
76 77 78 79 80
        case milvus::MetricType::L2:return "L2 distance";
        case milvus::MetricType::IP:return "Inner product";
        case milvus::MetricType::HAMMING:return "Hamming distance";
        case milvus::MetricType::JACCARD:return "Jaccard distance";
        case milvus::MetricType::TANIMOTO:return "Tanimoto distance";
81 82
        case milvus::MetricType::SUBSTRUCTURE:return "Substructure distance";
        case milvus::MetricType::SUPERSTRUCTURE:return "Superstructure distance";
83
        default:return "Unknown metric type";
G
groot 已提交
84 85 86 87 88 89
    }
}

std::string
Utils::IndexTypeName(const milvus::IndexType& index_type) {
    switch (index_type) {
90 91 92 93 94 95 96 97
        case milvus::IndexType::FLAT:return "FLAT";
        case milvus::IndexType::IVFFLAT:return "IVFFLAT";
        case milvus::IndexType::IVFSQ8:return "IVFSQ8";
        case milvus::IndexType::RNSG:return "NSG";
        case milvus::IndexType::IVFSQ8H:return "IVFSQ8H";
        case milvus::IndexType::IVFPQ:return "IVFPQ";
        case milvus::IndexType::SPTAGKDT:return "SPTAGKDT";
        case milvus::IndexType::SPTAGBKT:return "SPTAGBKT";
Y
yukun 已提交
98 99
        case milvus::IndexType::HNSW:return "HNSW";
        case milvus::IndexType::ANNOY:return "ANNOY";
100
        default:return "Unknown index type";
G
groot 已提交
101 102 103 104
    }
}

void
G
groot 已提交
105
Utils::PrintCollectionParam(const milvus::CollectionParam& collection_param) {
G
groot 已提交
106
    BLOCK_SPLITER
G
groot 已提交
107 108 109 110
    std::cout << "Collection name: " << collection_param.collection_name << std::endl;
    std::cout << "Collection dimension: " << collection_param.dimension << std::endl;
    std::cout << "Collection index file size: " << collection_param.index_file_size << std::endl;
    std::cout << "Collection metric type: " << MetricTypeName(collection_param.metric_type) << std::endl;
G
groot 已提交
111 112 113 114 115 116
    BLOCK_SPLITER
}

void
Utils::PrintPartitionParam(const milvus::PartitionParam& partition_param) {
    BLOCK_SPLITER
G
groot 已提交
117
    std::cout << "Collection name: " << partition_param.collection_name << std::endl;
G
groot 已提交
118 119 120 121 122 123 124
    std::cout << "Partition tag: " << partition_param.partition_tag << std::endl;
    BLOCK_SPLITER
}

void
Utils::PrintIndexParam(const milvus::IndexParam& index_param) {
    BLOCK_SPLITER
G
groot 已提交
125
    std::cout << "Index collection name: " << index_param.collection_name << std::endl;
G
groot 已提交
126
    std::cout << "Index type: " << IndexTypeName(index_param.index_type) << std::endl;
127
    std::cout << "Index extra_params: " << index_param.extra_params << std::endl;
G
groot 已提交
128 129 130 131
    BLOCK_SPLITER
}

void
G
groot 已提交
132 133
Utils::BuildEntities(int64_t from, int64_t to, std::vector<milvus::Entity>& entity_array,
                     std::vector<int64_t>& entity_ids, int64_t dimension) {
G
groot 已提交
134 135 136 137
    if (to <= from) {
        return;
    }

G
groot 已提交
138 139
    entity_array.clear();
    entity_ids.clear();
Y
yukun 已提交
140 141
    std::default_random_engine e;
    std::uniform_real_distribution<float> u(0, 1);
G
groot 已提交
142
    for (int64_t k = from; k < to; k++) {
G
groot 已提交
143 144
        milvus::Entity entity;
        entity.float_data.resize(dimension);
G
groot 已提交
145
        for (int64_t i = 0; i < dimension; i++) {
Y
yukun 已提交
146
            entity.float_data[i] = (u(e));
G
groot 已提交
147 148
        }

G
groot 已提交
149 150
        entity_array.emplace_back(entity);
        entity_ids.push_back(k);
G
groot 已提交
151 152 153 154
    }
}

void
G
groot 已提交
155
Utils::PrintSearchResult(const std::vector<std::pair<int64_t, milvus::Entity>>& entity_array,
G
groot 已提交
156
                         const milvus::TopKQueryResult& topk_query_result) {
G
groot 已提交
157
    BLOCK_SPLITER
158
    std::cout << "Returned result count: " << topk_query_result.size() << std::endl;
G
groot 已提交
159

G
groot 已提交
160
    if (topk_query_result.size() != entity_array.size()) {
161
        std::cout << "ERROR: Returned result count not equal nq" << std::endl;
162 163 164 165 166 167
        return;
    }

    for (size_t i = 0; i < topk_query_result.size(); i++) {
        const milvus::QueryResult& one_result = topk_query_result[i];
        size_t topk = one_result.ids.size();
G
groot 已提交
168 169
        auto search_id = entity_array[i].first;
        std::cout << "No." << i << " entity " << search_id << " top " << topk << " search result:" << std::endl;
G
groot 已提交
170
        for (size_t j = 0; j < topk; j++) {
171
            std::cout << "\t" << one_result.ids[j] << "\t" << one_result.distances[j] << std::endl;
G
groot 已提交
172 173 174 175 176 177
        }
    }
    BLOCK_SPLITER
}

void
G
groot 已提交
178
Utils::CheckSearchResult(const std::vector<std::pair<int64_t, milvus::Entity>>& entity_array,
G
groot 已提交
179
                         const milvus::TopKQueryResult& topk_query_result) {
G
groot 已提交
180
    BLOCK_SPLITER
181
    size_t nq = topk_query_result.size();
G
groot 已提交
182
    for (size_t i = 0; i < nq; i++) {
183
        const milvus::QueryResult& one_result = topk_query_result[i];
G
groot 已提交
184
        auto search_id = entity_array[i].first;
G
groot 已提交
185 186 187 188 189 190 191 192 193 194 195

        uint64_t match_index = one_result.ids.size();
        for (uint64_t index = 0; index < one_result.ids.size(); index++) {
            if (search_id == one_result.ids[index]) {
                match_index = index;
                break;
            }
        }

        if (match_index >= one_result.ids.size()) {
            std::cout << "The topk result is wrong: not return search target in result set" << std::endl;
G
groot 已提交
196
        } else {
G
groot 已提交
197 198
            std::cout << "No." << i << " Check result successfully for target: " << search_id << " at top "
                      << match_index << std::endl;
G
groot 已提交
199 200 201 202 203 204
        }
    }
    BLOCK_SPLITER
}

void
G
groot 已提交
205
Utils::DoSearch(std::shared_ptr<milvus::Connection> conn, const std::string& collection_name,
G
groot 已提交
206
                const std::vector<std::string>& partition_tags, int64_t top_k, int64_t nprobe,
G
groot 已提交
207
                const std::vector<std::pair<int64_t, milvus::Entity>>& entity_array,
G
groot 已提交
208
                milvus::TopKQueryResult& topk_query_result) {
209
    topk_query_result.clear();
G
groot 已提交
210

G
groot 已提交
211 212 213
    std::vector<milvus::Entity> temp_entity_array;
    for (auto& pair : entity_array) {
        temp_entity_array.push_back(pair.second);
G
groot 已提交
214 215 216 217
    }

    {
        BLOCK_SPLITER
218
        JSON json_params = {{"nprobe", nprobe}};
219
        milvus_sdk::TimeRecorder rc("Search");
G
groot 已提交
220
        milvus::Status stat =
G
groot 已提交
221 222 223 224 225 226 227
            conn->Search(collection_name,
                         partition_tags,
                         temp_entity_array,
                         top_k,
                         json_params.dump(),
                         topk_query_result);
        std::cout << "Search function call status: " << stat.message() << std::endl;
G
groot 已提交
228 229 230
        BLOCK_SPLITER
    }

G
groot 已提交
231 232
    PrintSearchResult(entity_array, topk_query_result);
    CheckSearchResult(entity_array, topk_query_result);
G
groot 已提交
233 234
}

235 236
void ConstructVector(uint64_t nq, uint64_t dimension, std::vector<milvus::Entity>& query_vector) {
    query_vector.resize(nq);
Y
yukun 已提交
237 238
    std::default_random_engine e;
    std::uniform_real_distribution<float> u(0, 1);
239 240 241
    for (uint64_t i = 0; i < nq; ++i) {
        query_vector[i].float_data.resize(dimension);
        for (uint64_t j = 0; j < dimension; ++j) {
Y
yukun 已提交
242
            query_vector[i].float_data[j] = u(e);
243 244 245 246 247 248 249
        }
    }
}

std::vector<milvus::LeafQueryPtr>
Utils::GenLeafQuery() {
    //Construct TermQuery
Y
yukun 已提交
250
    uint64_t row_num = 10000;
251 252 253 254
    std::vector<int64_t> field_value;
    field_value.resize(row_num);
    for (uint64_t i = 0; i < row_num; ++i) {
        field_value[i] = i;
255 256 257
    }
    milvus::TermQueryPtr tq = std::make_shared<milvus::TermQuery>();
    tq->field_name = "field_1";
Y
yukun 已提交
258
    tq->int_value = field_value;
259 260

    //Construct RangeQuery
Y
yukun 已提交
261
    milvus::CompareExpr ce1 = {milvus::CompareOperator::LTE, "100000"}, ce2 = {milvus::CompareOperator::GTE, "1"};
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
    std::vector<milvus::CompareExpr> ces{ce1, ce2};
    milvus::RangeQueryPtr rq = std::make_shared<milvus::RangeQuery>();
    rq->field_name = "field_2";
    rq->compare_expr = ces;

    //Construct VectorQuery
    uint64_t NQ = 10;
    uint64_t DIMENSION = 128;
    uint64_t NPROBE = 32;
    milvus::VectorQueryPtr vq = std::make_shared<milvus::VectorQuery>();
    ConstructVector(NQ, DIMENSION, vq->query_vector);
    vq->field_name = "field_3";
    vq->topk = 10;
    JSON json_params = {{"nprobe", NPROBE}};
    vq->extra_params = json_params.dump();


    std::vector<milvus::LeafQueryPtr> lq;
    milvus::LeafQueryPtr lq1 = std::make_shared<milvus::LeafQuery>();
    milvus::LeafQueryPtr lq2 = std::make_shared<milvus::LeafQuery>();
    milvus::LeafQueryPtr lq3 = std::make_shared<milvus::LeafQuery>();
    lq.emplace_back(lq1);
    lq.emplace_back(lq2);
    lq.emplace_back(lq3);
    lq1->term_query_ptr = tq;
    lq2->range_query_ptr = rq;
    lq3->vector_query_ptr = vq;

    lq1->query_boost = 1.0;
    lq2->query_boost = 2.0;
    lq3->query_boost = 3.0;
    return lq;
}

G
groot 已提交
296
}  // namespace milvus_sdk