MilvusApi.h 16.8 KB
Newer Older
1
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
J
jinhai 已提交
2
//
3 4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
J
jinhai 已提交
5
//
6 7 8 9 10
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
J
jinhai 已提交
11

J
jinhai 已提交
12 13
#pragma once

S
starlord 已提交
14
#include <memory>
J
jinhai 已提交
15 16 17
#include <string>
#include <vector>

18 19
#include "Status.h"

G
groot 已提交
20
/** \brief Milvus SDK namespace
J
jinhai 已提交
21
 */
G
groot 已提交
22
namespace milvus {
J
jinhai 已提交
23 24 25 26 27

/**
 * @brief Index Type
 */
enum class IndexType {
Y
yudong.cai 已提交
28 29 30 31
    INVALID = 0,
    FLAT = 1,
    IVFFLAT = 2,
    IVFSQ8 = 3,
Y
Yukikaze-CZR 已提交
32
    RNSG = 4,
Y
yudong.cai 已提交
33
    IVFSQ8H = 5,
34 35 36
    IVFPQ = 6,
    SPTAGKDT = 7,
    SPTAGBKT = 8,
T
Tinkerrr 已提交
37
    HNSW = 11,
J
jinhai 已提交
38 39
};

S
starlord 已提交
40
enum class MetricType {
41 42 43 44 45
    L2 = 1,        // Euclidean Distance
    IP = 2,        // Cosine Similarity
    HAMMING = 3,   // Hamming Distance
    JACCARD = 4,   // Jaccard Distance
    TANIMOTO = 5,  // Tanimoto Distance
S
starlord 已提交
46 47
};

J
jinhai 已提交
48 49 50 51
/**
 * @brief Connect API parameter
 */
struct ConnectParam {
S
starlord 已提交
52 53
    std::string ip_address;  ///< Server IP address
    std::string port;        ///< Server PORT
J
jinhai 已提交
54 55 56
};

/**
G
groot 已提交
57
 * @brief Collection parameters
J
jinhai 已提交
58
 */
G
groot 已提交
59 60
struct CollectionParam {
    std::string collection_name;              ///< Collection_name name
S
starlord 已提交
61
    int64_t dimension = 0;                    ///< Vector dimension, must be a positive value
62
    int64_t index_file_size = 1024;           ///< Index file size, must be a positive value, unit: MB
S
starlord 已提交
63
    MetricType metric_type = MetricType::L2;  ///< Index metric type
J
jinhai 已提交
64 65 66
};

/**
G
groot 已提交
67
 * @brief Entity inserted, currently each entity represent a vector
J
jinhai 已提交
68
 */
G
groot 已提交
69
struct Entity {
70
    std::vector<float> float_data;     ///< Vector raw float data
G
groot 已提交
71
    std::vector<uint8_t> binary_data;  ///< Vector raw binary data
J
jinhai 已提交
72 73 74 75 76
};

/**
 * @brief TopK query result
 */
77
struct QueryResult {
G
groot 已提交
78
    std::vector<int64_t> ids;      ///< Query entity ids result
F
fishpenguin 已提交
79
    std::vector<float> distances;  ///< Query distances result
J
jinhai 已提交
80
};
81
using TopKQueryResult = std::vector<QueryResult>;  ///< Topk query result
J
jinhai 已提交
82

Y
Yu Kun 已提交
83
/**
84 85 86
 * @brief Index parameters
 * Note: extra_params is extra parameters list, it must be json format
 *       For different index type, parameter list is different accordingly, for example:
G
typo  
groot 已提交
87
 *       FLAT/IVFLAT/SQ8:  {nlist: 16384}
G
groot 已提交
88
 *           ///< nlist range:[1, 999999]
G
typo  
groot 已提交
89
 *       IVFPQ:  {nlist: 16384, m: 12}
G
groot 已提交
90 91
 *           ///< nlist range:[1, 999999]
 *           ///< m is decided by dim and have a couple of results.
G
typo  
groot 已提交
92
 *       NSG:  {search_length: 45, out_degree:50, candidate_pool_size:300, knng:100}
G
groot 已提交
93 94 95 96
 *           ///< search_length range:[10, 300]
 *           ///< out_degree range:[5, 300]
 *           ///< candidate_pool_size range:[50, 1000]
 *           ///< knng range:[5, 300]
G
typo  
groot 已提交
97
 *       HNSW  {M: 16, efConstruction:300}
G
groot 已提交
98
 *           ///< M range:[5, 48]
G
typo  
groot 已提交
99
 *           ///< efConstruction range:[100, 500]
Y
Yu Kun 已提交
100 101
 */
struct IndexParam {
G
groot 已提交
102
    std::string collection_name;        ///< Collection name for create index
103 104
    IndexType index_type;               ///< Index type
    std::string extra_params;           ///< Extra parameters according to different index type, must be json format
Y
Yu Kun 已提交
105
};
G
groot 已提交
106

G
groot 已提交
107 108 109 110
/**
 * @brief partition parameters
 */
struct PartitionParam {
G
groot 已提交
111
    std::string collection_name;
G
groot 已提交
112 113 114
    std::string partition_tag;
};

115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
using PartitionTagList = std::vector<std::string>;

/**
 * @brief segment statistics
 */
struct SegmentStat {
    std::string segment_name;    ///< Segment name
    int64_t row_count;           ///< Segment row count
    std::string index_name;      ///< Segment index name
    int64_t data_size;           ///< Segment data size
};

/**
 * @brief partition statistics
 */
struct PartitionStat {
    std::string tag;                          ///< Partition tag
    int64_t row_count;                        ///< Partition row count
    std::vector<SegmentStat> segments_stat;   ///< Partition's segments statistics
};

/**
G
groot 已提交
137
 * @brief collection info
138
 */
G
groot 已提交
139 140 141
struct CollectionInfo {
    int64_t total_row_count;                      ///< Collection total entity count
    std::vector<PartitionStat> partitions_stat;   ///< Collection's partitions statistics
142
};
G
groot 已提交
143

J
jinhai 已提交
144 145 146 147
/**
 * @brief SDK main class
 */
class Connection {
K
kun yu 已提交
148
 public:
J
jinhai 已提交
149
    /**
G
groot 已提交
150
     * @brief Create connection
J
jinhai 已提交
151 152 153
     *
     * Create a connection instance and return it's shared pointer
     *
G
groot 已提交
154
     * @return connection instance pointer
J
jinhai 已提交
155 156 157 158 159 160
     */

    static std::shared_ptr<Connection>
    Create();

    /**
G
groot 已提交
161
     * @brief Destroy connection
J
jinhai 已提交
162 163 164 165 166 167 168 169 170
     *
     * Destroy the connection instance
     *
     * @param connection, the shared pointer to the instance to be destroyed
     *
     * @return if destroy is successful
     */

    static Status
S
starlord 已提交
171
    Destroy(std::shared_ptr<Connection>& connection_ptr);
J
jinhai 已提交
172 173 174 175

    /**
     * @brief Connect
     *
176 177
     * This method is used to connect server.
     * Connect function should be called before any operations.
J
jinhai 已提交
178 179 180 181 182 183
     *
     * @param param, use to provide server information
     *
     * @return Indicate if connect is successful
     */

Y
Yu Kun 已提交
184
    virtual Status
G
groot 已提交
185
    Connect(const ConnectParam& connect_param) = 0;
J
jinhai 已提交
186 187 188 189

    /**
     * @brief Connect
     *
190 191
     * This method is used to connect server.
     * Connect function should be called before any operations.
J
jinhai 已提交
192
     *
193
     * @param uri, use to provide server uri, example: milvus://ipaddress:port
J
jinhai 已提交
194 195 196
     *
     * @return Indicate if connect is successful
     */
Y
Yu Kun 已提交
197
    virtual Status
S
starlord 已提交
198
    Connect(const std::string& uri) = 0;
J
jinhai 已提交
199 200

    /**
G
groot 已提交
201
     * @brief Connected
J
jinhai 已提交
202
     *
203
     * This method is used to test whether server is connected.
J
jinhai 已提交
204 205 206
     *
     * @return Indicate if connection status
     */
Y
Yu Kun 已提交
207 208
    virtual Status
    Connected() const = 0;
J
jinhai 已提交
209 210 211 212

    /**
     * @brief Disconnect
     *
213
     * This method is used to disconnect server.
J
jinhai 已提交
214 215 216
     *
     * @return Indicate if disconnect is successful
     */
Y
Yu Kun 已提交
217 218
    virtual Status
    Disconnect() = 0;
J
jinhai 已提交
219 220

    /**
G
groot 已提交
221
     * @brief Get the client version
G
groot 已提交
222 223 224 225 226 227 228 229 230
     *
     * This method is used to give the client version.
     *
     * @return Client version.
     */
    virtual std::string
    ClientVersion() const = 0;

    /**
G
groot 已提交
231
     * @brief Get the server version
G
groot 已提交
232 233 234 235 236 237 238 239
     *
     * This method is used to give the server version.
     *
     * @return Server version.
     */
    virtual std::string
    ServerVersion() const = 0;

G
groot 已提交
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
    /**
     * @brief Get the server status
     *
     * This method is used to give the server status.
     *
     * @return Server status.
     */
    virtual std::string
    ServerStatus() const = 0;

    /**
     * @brief Get config method
     *
     * This method is used to set config.
     *
     * @param node_name, config node name.
     * @param value, config value.
     *
     * @return Indicate if this operation is successful.
     */
    virtual Status
    GetConfig(const std::string& node_name, std::string& value) const = 0;

    /**
     * @brief Set config method
     *
     * This method is used to set config.
     *
     * @param node_name, config node name.
     * @param value, config value.
     *
     * @return Indicate if this operation is successful.
     */
    virtual Status
    SetConfig(const std::string& node_name, const std::string& value) const = 0;

G
groot 已提交
276 277
    /**
     * @brief Create collection method
J
jinhai 已提交
278
     *
G
groot 已提交
279
     * This method is used to create collection.
J
jinhai 已提交
280
     *
G
groot 已提交
281
     * @param param, use to provide collection information to be created.
J
jinhai 已提交
282
     *
G
groot 已提交
283
     * @return Indicate if collection is created successfully
J
jinhai 已提交
284
     */
Y
Yu Kun 已提交
285
    virtual Status
G
groot 已提交
286
    CreateCollection(const CollectionParam& param) = 0;
J
jinhai 已提交
287

G
groot 已提交
288
    /**
G
groot 已提交
289
     * @brief Test collection existence method
G
groot 已提交
290
     *
G
groot 已提交
291
     * This method is used to test collection existence.
G
groot 已提交
292
     *
G
groot 已提交
293
     * @param collection_name, target collection's name.
G
groot 已提交
294
     *
G
groot 已提交
295
     * @return Indicate if collection is cexist
G
groot 已提交
296
     */
Y
Yu Kun 已提交
297
    virtual bool
G
groot 已提交
298
    HasCollection(const std::string& collection_name) = 0;
G
groot 已提交
299

J
jinhai 已提交
300
    /**
G
groot 已提交
301
     * @brief Drop collection method
J
jinhai 已提交
302
     *
G
groot 已提交
303
     * This method is used to drop collection(and its partitions).
J
jinhai 已提交
304
     *
G
groot 已提交
305
     * @param collection_name, target collection's name.
J
jinhai 已提交
306
     *
G
groot 已提交
307
     * @return Indicate if collection is drop successfully.
J
jinhai 已提交
308
     */
Y
Yu Kun 已提交
309
    virtual Status
G
groot 已提交
310
    DropCollection(const std::string& collection_name) = 0;
K
kun yu 已提交
311

312
    /**
Y
Yu Kun 已提交
313
     * @brief Create index method
314
     *
G
groot 已提交
315
     * This method is used to create index for whole collection(and its partitions).
316
     *
G
groot 已提交
317
     * @param index_param, use to provide index information to be created.
318
     *
G
groot 已提交
319
     * @return Indicate if create index successfully.
320
     */
Y
Yu Kun 已提交
321
    virtual Status
S
starlord 已提交
322
    CreateIndex(const IndexParam& index_param) = 0;
323

J
jinhai 已提交
324
    /**
G
groot 已提交
325
     * @brief Insert entity to collection
J
jinhai 已提交
326
     *
G
groot 已提交
327
     * This method is used to insert vector array to collection.
J
jinhai 已提交
328
     *
G
groot 已提交
329
     * @param collection_name, target collection's name.
330
     * @param partition_tag, target partition's tag, keep empty if no partition specified.
G
groot 已提交
331
     * @param entity_array, entity array is inserted, each entitu represent a vector.
332
     * @param id_array,
G
groot 已提交
333 334
     *  specify id for each entity,
     *  if this array is empty, milvus will generate unique id for each entity,
335
     *  and return all ids by this parameter.
J
jinhai 已提交
336
     *
G
groot 已提交
337
     * @return Indicate if entity array are inserted successfully
J
jinhai 已提交
338
     */
Y
Yu Kun 已提交
339
    virtual Status
G
groot 已提交
340 341 342
    Insert(const std::string& collection_name,
           const std::string& partition_tag,
           const std::vector<Entity>& entity_array,
S
starlord 已提交
343
           std::vector<int64_t>& id_array) = 0;
J
jinhai 已提交
344

345
    /**
G
groot 已提交
346
     * @brief Get entity data by id
347
     *
G
groot 已提交
348 349
     * This method is used to get entity data by id from a collection.
     * Return the first found entity if there are entities with duplicated id
350
     *
G
groot 已提交
351 352 353
     * @param collection_name, target collection's name.
     * @param entity_id, target entity id.
     * @param entity_data, returned entity data.
354 355 356 357
     *
     * @return Indicate if the operation is succeed.
     */
    virtual Status
G
groot 已提交
358
    GetEntityByID(const std::string& collection_name, int64_t entity_id, Entity& entity_data) = 0;
359 360

    /**
G
groot 已提交
361
     * @brief Get entity ids from a segment
362
     *
G
groot 已提交
363 364
     * This method is used to get entity ids from a segment
     * Return all entity(not deleted) ids
365
     *
G
groot 已提交
366
     * @param collection_name, target collection's name.
367
     * @param segment_name, target segment name.
G
groot 已提交
368
     * @param id_array, returned entity id array.
369 370 371 372
     *
     * @return Indicate if the operation is succeed.
     */
    virtual Status
G
groot 已提交
373 374 375
    GetIDsInSegment(const std::string& collection_name,
                    const std::string& segment_name,
                    std::vector<int64_t>& id_array) = 0;
376

J
jinhai 已提交
377
    /**
G
groot 已提交
378
     * @brief Search entities in a collection
J
jinhai 已提交
379
     *
G
groot 已提交
380
     * This method is used to query entity in collection.
J
jinhai 已提交
381
     *
G
groot 已提交
382
     * @param collection_name, target collection's name.
383
     * @param partition_tag_array, target partitions, keep empty if no partition specified.
G
groot 已提交
384 385
     * @param query_entity_array, vectors to be queried.
     * @param topk, how many similarity entities will be returned.
386 387 388
     * @param extra_params, extra search parameters according to different index type, must be json format.
     * Note: extra_params is extra parameters list, it must be json format, for example:
     *       For different index type, parameter list is different accordingly
G
typo  
groot 已提交
389
     *       FLAT/IVFLAT/SQ8/IVFPQ:  {nprobe: 32}
G
groot 已提交
390
     *           ///< nprobe range:[1,999999]
G
typo  
groot 已提交
391
     *       NSG:  {search_length:100}
G
groot 已提交
392
     *           ///< search_length range:[10, 300]
G
typo  
groot 已提交
393 394
     *       HNSW  {ef: 64}
     *           ///< ef range:[topk, 4096]
395
     * @param topk_query_result, result array.
J
jinhai 已提交
396 397 398
     *
     * @return Indicate if query is successful.
     */
Y
Yu Kun 已提交
399
    virtual Status
G
groot 已提交
400 401
    Search(const std::string& collection_name, const PartitionTagList& partition_tag_array,
           const std::vector<Entity>& entity_array, int64_t topk,
402
           const std::string& extra_params, TopKQueryResult& topk_query_result) = 0;
J
jinhai 已提交
403 404

    /**
G
groot 已提交
405
     * @brief Show collection description
J
jinhai 已提交
406
     *
G
groot 已提交
407
     * This method is used to show collection information.
J
jinhai 已提交
408
     *
G
groot 已提交
409
     * @param collection_name, target collection's name.
G
groot 已提交
410
     * @param collection_param, collection_param is given when operation is successful.
J
jinhai 已提交
411 412 413
     *
     * @return Indicate if this operation is successful.
     */
Y
Yu Kun 已提交
414
    virtual Status
G
groot 已提交
415
    DescribeCollection(const std::string& collection_name, CollectionParam& collection_param) = 0;
J
jinhai 已提交
416

G
groot 已提交
417
    /**
G
groot 已提交
418
     * @brief Get collection entity count
G
groot 已提交
419
     *
G
groot 已提交
420
     * This method is used to get collection entity count.
G
groot 已提交
421
     *
G
groot 已提交
422 423
     * @param collection_name, target collection's name.
     * @param entity_count, collection total entity count(including partitions).
G
groot 已提交
424 425 426
     *
     * @return Indicate if this operation is successful.
     */
Y
Yu Kun 已提交
427
    virtual Status
G
groot 已提交
428
    CountCollection(const std::string& collection_name, int64_t& entity_count) = 0;
G
groot 已提交
429

J
jinhai 已提交
430
    /**
G
groot 已提交
431
     * @brief Show all collections in database
J
jinhai 已提交
432
     *
G
groot 已提交
433
     * This method is used to list all collections.
J
jinhai 已提交
434
     *
G
groot 已提交
435
     * @param collection_array, all collections in database.
J
jinhai 已提交
436 437 438
     *
     * @return Indicate if this operation is successful.
     */
Y
Yu Kun 已提交
439
    virtual Status
G
groot 已提交
440
    ShowCollections(std::vector<std::string>& collection_array) = 0;
J
jinhai 已提交
441

442
    /**
G
groot 已提交
443
     * @brief Show collection information
444
     *
G
groot 已提交
445
     * This method is used to get detail information of a collection.
446
     *
G
groot 已提交
447 448
     * @param collection_name, target collection's name.
     * @param collection_info, target collection's information
449 450 451 452
     *
     * @return Indicate if this operation is successful.
     */
    virtual Status
G
groot 已提交
453
    ShowCollectionInfo(const std::string& collection_name, CollectionInfo& collection_info) = 0;
Y
Yu Kun 已提交
454 455

    /**
G
groot 已提交
456
     * @brief Delete entity by id
Y
Yu Kun 已提交
457
     *
G
groot 已提交
458
     * This method is used to delete entity by id.
Y
Yu Kun 已提交
459
     *
G
groot 已提交
460 461
     * @param collection_name, target collection's name.
     * @param id_array, entity id array to be deleted.
Y
Yu Kun 已提交
462 463 464 465
     *
     * @return Indicate if this operation is successful.
     */
    virtual Status
G
groot 已提交
466
    DeleteByID(const std::string& collection_name, const std::vector<int64_t>& id_array) = 0;
Y
Yu Kun 已提交
467 468

    /**
G
groot 已提交
469
     * @brief Preload collection
Y
Yu Kun 已提交
470
     *
G
groot 已提交
471
     * This method is used to preload collection data into memory
Y
Yu Kun 已提交
472
     *
G
groot 已提交
473
     * @param collection_name, target collection's name.
Y
Yu Kun 已提交
474 475 476 477
     *
     * @return Indicate if this operation is successful.
     */
    virtual Status
G
groot 已提交
478
    PreloadCollection(const std::string& collection_name) const = 0;
Y
Yu Kun 已提交
479 480

    /**
G
groot 已提交
481
     * @brief Describe index
Y
Yu Kun 已提交
482 483 484
     *
     * This method is used to describe index
     *
G
groot 已提交
485
     * @param collection_name, target collection's name.
G
groot 已提交
486
     * @param index_param, returned index information.
Y
Yu Kun 已提交
487
     *
G
groot 已提交
488
     * @return Indicate if this operation is successful.
Y
Yu Kun 已提交
489
     */
490
    virtual Status
G
groot 已提交
491
    DescribeIndex(const std::string& collection_name, IndexParam& index_param) const = 0;
Y
Yu Kun 已提交
492 493

    /**
G
groot 已提交
494
     * @brief Drop index
Y
Yu Kun 已提交
495
     *
G
groot 已提交
496
     * This method is used to drop index of collection(and its partitions)
Y
Yu Kun 已提交
497
     *
G
groot 已提交
498
     * @param collection_name, target collection's name.
Y
Yu Kun 已提交
499 500 501 502
     *
     * @return Indicate if this operation is successful.
     */
    virtual Status
G
groot 已提交
503
    DropIndex(const std::string& collection_name) const = 0;
G
groot 已提交
504 505 506 507

    /**
     * @brief Create partition method
     *
G
groot 已提交
508
     * This method is used to create collection's partition
G
groot 已提交
509
     *
G
groot 已提交
510
     * @param partition_param, use to provide partition information to be created.
G
groot 已提交
511 512 513 514
     *
     * @return Indicate if partition is created successfully
     */
    virtual Status
G
groot 已提交
515
    CreatePartition(const PartitionParam& partition_param) = 0;
G
groot 已提交
516 517

    /**
G
groot 已提交
518
     * @brief Show all partitions method
G
groot 已提交
519
     *
G
groot 已提交
520
     * This method is used to show all partitions(return their tags)
G
groot 已提交
521
     *
G
groot 已提交
522 523
     * @param collection_name, target collection's name.
     * @param partition_tag_array, partition tag array of the collection.
G
groot 已提交
524 525 526 527
     *
     * @return Indicate if this operation is successful
     */
    virtual Status
G
groot 已提交
528
    ShowPartitions(const std::string& collection_name, PartitionTagList& partition_tag_array) const = 0;
G
groot 已提交
529 530 531 532

    /**
     * @brief Delete partition method
     *
G
groot 已提交
533
     * This method is used to delete collection's partition.
G
groot 已提交
534
     *
G
groot 已提交
535
     * @param partition_param, target partition to be deleted.
G
groot 已提交
536 537 538 539
     *
     * @return Indicate if partition is delete successfully.
     */
    virtual Status
G
groot 已提交
540
    DropPartition(const PartitionParam& partition_param) = 0;
541 542

    /**
G
groot 已提交
543
     * @brief Flush collection buffer into storage
544
     *
G
groot 已提交
545
     * This method is used to flush collection buffer into storage
546
     *
G
groot 已提交
547
     * @param collection_name, target collection's name.
548 549 550 551
     *
     * @return Indicate if this operation is successful.
     */
    virtual Status
G
groot 已提交
552
    FlushCollection(const std::string& collection_name) = 0;
553 554

    /**
G
groot 已提交
555
     * @brief Flush all buffer into storage
556
     *
G
groot 已提交
557
     * This method is used to all collection buffer into storage
558 559 560 561 562 563 564
     *
     * @return Indicate if this operation is successful.
     */
    virtual Status
    Flush() = 0;

    /**
G
groot 已提交
565
     * @brief Compact collection, permanently remove deleted vectors
566
     *
G
groot 已提交
567
     * This method is used to compact collection
568
     *
G
groot 已提交
569
     * @param collection_name, target collection's name.
570 571 572 573
     *
     * @return Indicate if this operation is successful.
     */
    virtual Status
G
groot 已提交
574
    CompactCollection(const std::string& collection_name) = 0;
J
jinhai 已提交
575 576
};

S
starlord 已提交
577
}  // namespace milvus