Utils.cpp 8.8 KB
Newer Older
1
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
G
groot 已提交
2
//
3 4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
G
groot 已提交
5
//
6 7 8 9 10
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
G
groot 已提交
11

Y
yukun 已提交
12
#include "examples/utils/Utils.h"
G
groot 已提交
13 14 15

#include <time.h>
#include <unistd.h>
16

G
groot 已提交
17 18 19 20 21
#include <iostream>
#include <memory>
#include <utility>
#include <vector>

22 23
#include "examples/utils/TimeRecorder.h"

G
groot 已提交
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
namespace milvus_sdk {

constexpr int64_t SECONDS_EACH_HOUR = 3600;

#define BLOCK_SPLITER std::cout << "===========================================" << std::endl;

std::string
Utils::CurrentTime() {
    time_t tt;
    time(&tt);
    tt = tt + 8 * SECONDS_EACH_HOUR;
    tm t;
    gmtime_r(&tt, &t);

    std::string str = std::to_string(t.tm_year + 1900) + "_" + std::to_string(t.tm_mon + 1) + "_" +
                      std::to_string(t.tm_mday) + "_" + std::to_string(t.tm_hour) + "_" + std::to_string(t.tm_min) +
                      "_" + std::to_string(t.tm_sec);

    return str;
}

std::string
Utils::CurrentTmDate(int64_t offset_day) {
    time_t tt;
    time(&tt);
    tt = tt + 8 * SECONDS_EACH_HOUR;
    tt = tt + 24 * SECONDS_EACH_HOUR * offset_day;
    tm t;
    gmtime_r(&tt, &t);

    std::string str =
        std::to_string(t.tm_year + 1900) + "-" + std::to_string(t.tm_mon + 1) + "-" + std::to_string(t.tm_mday);

    return str;
}

void
Utils::Sleep(int seconds) {
    std::cout << "Waiting " << seconds << " seconds ..." << std::endl;
    sleep(seconds);
}

const std::string&
G
groot 已提交
67 68
Utils::GenCollectionName() {
    static std::string s_id("C_" + CurrentTime());
G
groot 已提交
69 70 71 72 73 74
    return s_id;
}

std::string
Utils::MetricTypeName(const milvus::MetricType& metric_type) {
    switch (metric_type) {
75 76 77 78 79
        case milvus::MetricType::L2:return "L2 distance";
        case milvus::MetricType::IP:return "Inner product";
        case milvus::MetricType::HAMMING:return "Hamming distance";
        case milvus::MetricType::JACCARD:return "Jaccard distance";
        case milvus::MetricType::TANIMOTO:return "Tanimoto distance";
80 81
        case milvus::MetricType::SUBSTRUCTURE:return "Substructure distance";
        case milvus::MetricType::SUPERSTRUCTURE:return "Superstructure distance";
82
        default:return "Unknown metric type";
G
groot 已提交
83 84 85 86 87 88
    }
}

std::string
Utils::IndexTypeName(const milvus::IndexType& index_type) {
    switch (index_type) {
89 90 91 92 93 94 95 96
        case milvus::IndexType::FLAT:return "FLAT";
        case milvus::IndexType::IVFFLAT:return "IVFFLAT";
        case milvus::IndexType::IVFSQ8:return "IVFSQ8";
        case milvus::IndexType::RNSG:return "NSG";
        case milvus::IndexType::IVFSQ8H:return "IVFSQ8H";
        case milvus::IndexType::IVFPQ:return "IVFPQ";
        case milvus::IndexType::SPTAGKDT:return "SPTAGKDT";
        case milvus::IndexType::SPTAGBKT:return "SPTAGBKT";
Y
yukun 已提交
97 98
        case milvus::IndexType::HNSW:return "HNSW";
        case milvus::IndexType::ANNOY:return "ANNOY";
99
        default:return "Unknown index type";
G
groot 已提交
100 101 102 103
    }
}

void
G
groot 已提交
104
Utils::PrintCollectionParam(const milvus::CollectionParam& collection_param) {
G
groot 已提交
105
    BLOCK_SPLITER
G
groot 已提交
106 107 108 109
    std::cout << "Collection name: " << collection_param.collection_name << std::endl;
    std::cout << "Collection dimension: " << collection_param.dimension << std::endl;
    std::cout << "Collection index file size: " << collection_param.index_file_size << std::endl;
    std::cout << "Collection metric type: " << MetricTypeName(collection_param.metric_type) << std::endl;
G
groot 已提交
110 111 112 113 114 115
    BLOCK_SPLITER
}

void
Utils::PrintPartitionParam(const milvus::PartitionParam& partition_param) {
    BLOCK_SPLITER
G
groot 已提交
116
    std::cout << "Collection name: " << partition_param.collection_name << std::endl;
G
groot 已提交
117 118 119 120 121 122 123
    std::cout << "Partition tag: " << partition_param.partition_tag << std::endl;
    BLOCK_SPLITER
}

void
Utils::PrintIndexParam(const milvus::IndexParam& index_param) {
    BLOCK_SPLITER
G
groot 已提交
124
    std::cout << "Index collection name: " << index_param.collection_name << std::endl;
G
groot 已提交
125
    std::cout << "Index type: " << IndexTypeName(index_param.index_type) << std::endl;
126
    std::cout << "Index extra_params: " << index_param.extra_params << std::endl;
G
groot 已提交
127 128 129 130
    BLOCK_SPLITER
}

void
G
groot 已提交
131 132
Utils::BuildEntities(int64_t from, int64_t to, std::vector<milvus::Entity>& entity_array,
                     std::vector<int64_t>& entity_ids, int64_t dimension) {
G
groot 已提交
133 134 135 136
    if (to <= from) {
        return;
    }

G
groot 已提交
137 138
    entity_array.clear();
    entity_ids.clear();
G
groot 已提交
139
    for (int64_t k = from; k < to; k++) {
G
groot 已提交
140 141
        milvus::Entity entity;
        entity.float_data.resize(dimension);
G
groot 已提交
142
        for (int64_t i = 0; i < dimension; i++) {
G
groot 已提交
143
            entity.float_data[i] = (float)(k % (i + 1));
G
groot 已提交
144 145
        }

G
groot 已提交
146 147
        entity_array.emplace_back(entity);
        entity_ids.push_back(k);
G
groot 已提交
148 149 150 151
    }
}

void
G
groot 已提交
152
Utils::PrintSearchResult(const std::vector<std::pair<int64_t, milvus::Entity>>& entity_array,
G
groot 已提交
153
                         const milvus::TopKQueryResult& topk_query_result) {
G
groot 已提交
154
    BLOCK_SPLITER
155
    std::cout << "Returned result count: " << topk_query_result.size() << std::endl;
G
groot 已提交
156

G
groot 已提交
157
    if (topk_query_result.size() != entity_array.size()) {
158
        std::cout << "ERROR: Returned result count not equal nq" << std::endl;
159 160 161 162 163 164
        return;
    }

    for (size_t i = 0; i < topk_query_result.size(); i++) {
        const milvus::QueryResult& one_result = topk_query_result[i];
        size_t topk = one_result.ids.size();
G
groot 已提交
165 166
        auto search_id = entity_array[i].first;
        std::cout << "No." << i << " entity " << search_id << " top " << topk << " search result:" << std::endl;
G
groot 已提交
167
        for (size_t j = 0; j < topk; j++) {
168
            std::cout << "\t" << one_result.ids[j] << "\t" << one_result.distances[j] << std::endl;
G
groot 已提交
169 170 171 172 173 174
        }
    }
    BLOCK_SPLITER
}

void
G
groot 已提交
175
Utils::CheckSearchResult(const std::vector<std::pair<int64_t, milvus::Entity>>& entity_array,
G
groot 已提交
176
                         const milvus::TopKQueryResult& topk_query_result) {
G
groot 已提交
177
    BLOCK_SPLITER
178
    size_t nq = topk_query_result.size();
G
groot 已提交
179
    for (size_t i = 0; i < nq; i++) {
180
        const milvus::QueryResult& one_result = topk_query_result[i];
G
groot 已提交
181
        auto search_id = entity_array[i].first;
G
groot 已提交
182 183 184 185 186 187 188 189 190 191 192

        uint64_t match_index = one_result.ids.size();
        for (uint64_t index = 0; index < one_result.ids.size(); index++) {
            if (search_id == one_result.ids[index]) {
                match_index = index;
                break;
            }
        }

        if (match_index >= one_result.ids.size()) {
            std::cout << "The topk result is wrong: not return search target in result set" << std::endl;
G
groot 已提交
193
        } else {
G
groot 已提交
194 195
            std::cout << "No." << i << " Check result successfully for target: " << search_id << " at top "
                      << match_index << std::endl;
G
groot 已提交
196 197 198 199 200 201
        }
    }
    BLOCK_SPLITER
}

void
G
groot 已提交
202
Utils::DoSearch(std::shared_ptr<milvus::Connection> conn, const std::string& collection_name,
G
groot 已提交
203
                const std::vector<std::string>& partition_tags, int64_t top_k, int64_t nprobe,
G
groot 已提交
204
                const std::vector<std::pair<int64_t, milvus::Entity>>& entity_array,
G
groot 已提交
205
                milvus::TopKQueryResult& topk_query_result) {
206
    topk_query_result.clear();
G
groot 已提交
207

G
groot 已提交
208 209 210
    std::vector<milvus::Entity> temp_entity_array;
    for (auto& pair : entity_array) {
        temp_entity_array.push_back(pair.second);
G
groot 已提交
211 212 213 214
    }

    {
        BLOCK_SPLITER
215
        JSON json_params = {{"nprobe", nprobe}};
G
groot 已提交
216
        milvus_sdk::TimeRecorder rc("search");
G
groot 已提交
217
        milvus::Status stat =
G
groot 已提交
218 219 220 221 222 223 224
            conn->Search(collection_name,
                         partition_tags,
                         temp_entity_array,
                         top_k,
                         json_params.dump(),
                         topk_query_result);
        std::cout << "Search function call status: " << stat.message() << std::endl;
G
groot 已提交
225 226 227
        BLOCK_SPLITER
    }

G
groot 已提交
228 229
    PrintSearchResult(entity_array, topk_query_result);
    CheckSearchResult(entity_array, topk_query_result);
G
groot 已提交
230 231
}

232 233
void
PrintPartitionStat(const milvus::PartitionStat& partition_stat) {
G
groot 已提交
234
    std::cout << "\tPartition " << partition_stat.tag << " entity count: " << partition_stat.row_count << std::endl;
235
    for (auto& seg_stat : partition_stat.segments_stat) {
G
groot 已提交
236
        std::cout << "\t\tsegment " << seg_stat.segment_name << " entity count: " << seg_stat.row_count
237 238 239 240 241
                  << " index: " << seg_stat.index_name << " data size: " << seg_stat.data_size << std::endl;
    }
}

void
G
groot 已提交
242
Utils::PrintCollectionInfo(const milvus::CollectionInfo& info) {
243
    BLOCK_SPLITER
G
groot 已提交
244
    std::cout << "Collection " << " total entity count: " << info.total_row_count << std::endl;
245 246 247 248 249 250 251
    for (const milvus::PartitionStat& partition_stat : info.partitions_stat) {
        PrintPartitionStat(partition_stat);
    }

    BLOCK_SPLITER
}

G
groot 已提交
252
}  // namespace milvus_sdk