test_compact.py 28.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
import time
import pdb
import threading
import logging
from multiprocessing import Pool, Process
import pytest
from utils import *

dim = 128
index_file_size = 10
11
COMPACT_TIMEOUT = 180
12 13 14 15
nprobe = 1
top_k = 1
tag = "1970-01-01"
nb = 6000
16
nq = 2
17
segment_row_count = 5000
18 19 20 21 22
entity = gen_entities(1)
entities = gen_entities(nb)
raw_vector, binary_entity = gen_binary_entities(1)
raw_vectors, binary_entities = gen_binary_entities(nb)
default_fields = gen_default_fields()
D
del-zhenwu 已提交
23
default_binary_fields = gen_binary_default_fields()
24
field_name = default_float_vec_field_name
25
binary_field_name = default_binary_vec_field_name
26 27 28
default_single_query = {
    "bool": {
        "must": [
29
            {"vector": {field_name: {"topk": 10, "query": gen_vectors(1, dim), "metric_type":"L2",
30
                                     "params": {"nprobe": 10}}}}
31 32 33
        ]
    }
}
34 35 36 37 38 39 40 41
default_binary_single_query = {
    "bool": {
        "must": [
            {"vector": {binary_field_name: {"topk": 10, "query": gen_binary_vectors(1, dim), "metric_type":"JACCARD",
                                     "params": {"nprobe": 10}}}}
        ]
    }
}
42
default_query, default_query_vecs = gen_query_vectors(binary_field_name, binary_entities, top_k, nq)
43

44

45 46 47 48 49
def ip_query():
    query = copy.deepcopy(default_single_query)
    query["bool"]["must"][0]["vector"][field_name].update({"metric_type": "IP"})
    return query

50 51 52 53 54 55 56 57

class TestCompactBase:
    """
    ******************************************************************
      The following cases are used to test `compact` function
    ******************************************************************
    """
    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
58
    def test_compact_collection_name_None(self, connect, collection):
59
        '''
X
Xiaohai Xu 已提交
60 61
        target: compact collection where collection name is None
        method: compact with the collection_name: None
62 63
        expected: exception raised
        '''
X
Xiaohai Xu 已提交
64
        collection_name = None
65
        with pytest.raises(Exception) as e:
X
Xiaohai Xu 已提交
66
            status = connect.compact(collection_name)
67 68

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
69
    def test_compact_collection_name_not_existed(self, connect, collection):
70
        '''
X
Xiaohai Xu 已提交
71 72
        target: compact collection not existed
        method: compact with a random collection_name, which is not in db
73
        expected: exception raised
74
        '''
75 76 77
        collection_name = gen_unique_str("not_existed")
        with pytest.raises(Exception) as e:
            status = connect.compact(collection_name)
78 79 80
    
    @pytest.fixture(
        scope="function",
81
        params=gen_invalid_strs()
82
    )
X
Xiaohai Xu 已提交
83
    def get_collection_name(self, request):
84 85 86
        yield request.param

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
87
    def test_compact_collection_name_invalid(self, connect, get_collection_name):
88
        '''
X
Xiaohai Xu 已提交
89 90
        target: compact collection with invalid name
        method: compact with invalid collection_name
91
        expected: exception raised
92
        '''
X
Xiaohai Xu 已提交
93
        collection_name = get_collection_name
94 95 96
        with pytest.raises(Exception) as e:
            status = connect.compact(collection_name)
            # assert not status.OK()
97
    
98
    @pytest.mark.level(2)
99
    @pytest.mark.timeout(COMPACT_TIMEOUT)
100
    def test_add_entity_and_compact(self, connect, collection):
101
        '''
102 103 104
        target: test add entity and compact
        method: add entity and compact collection
        expected: data_size before and after Compact
105
        '''
106 107 108 109
        # vector = gen_single_vector(dim)
        ids = connect.insert(collection, entity)
        assert len(ids) == 1
        connect.flush([collection])
X
Xiaohai Xu 已提交
110
        # get collection info before compact
111
        info = connect.get_collection_stats(collection)
112
        logging.getLogger().info(info)
113
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
114
        status = connect.compact(collection)
115
        assert status.OK()
X
Xiaohai Xu 已提交
116
        # get collection info after compact
117
        info = connect.get_collection_stats(collection)
118
        size_after = info["partitions"][0]["segments"][0]["data_size"]
119
        assert(size_before == size_after)
120

121
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
122
    def test_insert_and_compact(self, connect, collection):
123
        '''
124 125 126
        target: test add entities and compact 
        method: add entities and compact collection
        expected: data_size before and after Compact
127
        '''
128 129 130
        # entities = gen_vector(nb, dim)
        ids = connect.insert(collection, entities)
        connect.flush([collection])
X
Xiaohai Xu 已提交
131
        # get collection info before compact
132 133
        info = connect.get_collection_stats(collection)
        # assert status.OK()
134
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
135
        status = connect.compact(collection)
136
        assert status.OK()
X
Xiaohai Xu 已提交
137
        # get collection info after compact
138 139
        info = connect.get_collection_stats(collection)
        # assert status.OK()
140
        size_after = info["partitions"][0]["segments"][0]["data_size"]
141 142 143
        assert(size_before == size_after)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
144
    def test_insert_delete_part_and_compact(self, connect, collection):
145
        '''
146 147 148
        target: test add entities, delete part of them and compact
        method: add entities, delete a few and compact collection
        expected: status ok, data size maybe is smaller after compact
149
        '''
150 151 152
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
153
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
154
        status = connect.delete_entity_by_id(collection, delete_ids)
155
        assert status.OK()
156
        connect.flush([collection])
X
Xiaohai Xu 已提交
157
        # get collection info before compact
158
        info = connect.get_collection_stats(collection)
159
        logging.getLogger().info(info["partitions"])
160
        size_before = info["partitions"][0]["data_size"]
161
        logging.getLogger().info(size_before)
X
Xiaohai Xu 已提交
162
        status = connect.compact(collection)
163
        assert status.OK()
X
Xiaohai Xu 已提交
164
        # get collection info after compact
165
        info = connect.get_collection_stats(collection)
166
        logging.getLogger().info(info["partitions"])
167
        size_after = info["partitions"][0]["data_size"]
168
        logging.getLogger().info(size_after)
G
groot 已提交
169
        assert(size_before >= size_after)
170
    
D
del-zhenwu 已提交
171
    @pytest.mark.level(2)
172
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
173
    def test_insert_delete_all_and_compact(self, connect, collection):
174
        '''
175 176
        target: test add entities, delete them and compact 
        method: add entities, delete all and compact collection
X
Xiaohai Xu 已提交
177
        expected: status ok, no data size in collection info because collection is empty
178
        '''
179 180 181
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
D
del-zhenwu 已提交
182
        status = connect.delete_entity_by_id(collection, ids)
183
        assert status.OK()
184
        connect.flush([collection])
X
Xiaohai Xu 已提交
185
        # get collection info before compact
186
        info = connect.get_collection_stats(collection)
X
Xiaohai Xu 已提交
187
        status = connect.compact(collection)
188
        assert status.OK()
X
Xiaohai Xu 已提交
189
        # get collection info after compact
190
        info = connect.get_collection_stats(collection)
191 192
        logging.getLogger().info(info["partitions"])
        assert not info["partitions"][0]["segments"]
193

D
del-zhenwu 已提交
194 195 196
    @pytest.mark.timeout(COMPACT_TIMEOUT)
    def test_insert_partition_delete_half_and_compact(self, connect, collection):
        '''
197 198
        target: test add entities into partition, delete them and compact 
        method: add entities, delete half of entities in partition and compact collection
D
del-zhenwu 已提交
199 200
        expected: status ok, data_size less than the older version
        '''
201 202 203 204 205
        connect.create_partition(collection, tag)
        assert connect.has_partition(collection, tag)
        ids = connect.insert(collection, entities, partition_tag=tag)
        connect.flush([collection])
        info = connect.get_collection_stats(collection)
D
del-zhenwu 已提交
206 207 208 209 210
        logging.getLogger().info(info["partitions"])

        delete_ids = ids[:3000]
        status = connect.delete_entity_by_id(collection, delete_ids)
        assert status.OK()
211
        connect.flush([collection])
D
del-zhenwu 已提交
212
        # get collection info before compact
213
        info = connect.get_collection_stats(collection)
D
del-zhenwu 已提交
214 215 216 217
        logging.getLogger().info(info["partitions"])
        status = connect.compact(collection)
        assert status.OK()
        # get collection info after compact
218
        info_after = connect.get_collection_stats(collection)
D
del-zhenwu 已提交
219 220 221
        logging.getLogger().info(info_after["partitions"])
        assert info["partitions"][1]["segments"][0]["data_size"] > info_after["partitions"][1]["segments"][0]["data_size"]

222 223
    @pytest.fixture(
        scope="function",
224
        params=gen_simple_index()
225
    )
226
    def get_simple_index(self, request, connect):
227 228 229 230 231 232
        if str(connect._cmd("mode")) == "GPU":
            if not request.param["index_type"] not in ivf():
                pytest.skip("Only support index_type: idmap/ivf")
        if str(connect._cmd("mode")) == "CPU":
            if request.param["index_type"] in index_cpu_not_support():
                pytest.skip("CPU not support index_type: ivf_sq8h")
233 234
        return request.param

235
    @pytest.mark.level(2)
X
Xiaohai Xu 已提交
236
    def test_compact_after_index_created(self, connect, collection, get_simple_index):
237
        '''
X
Xiaohai Xu 已提交
238
        target: test compact collection after index created
239
        method: add entities, create index, delete part of entities and compact
240 241 242
        expected: status ok, index description no change, data size smaller after compact
        '''
        count = 10
243 244
        ids = connect.insert(collection, entities)
        connect.flush([collection])
245
        connect.create_index(collection, field_name, get_simple_index)
246
        connect.flush([collection])
X
Xiaohai Xu 已提交
247
        # get collection info before compact
248
        info = connect.get_collection_stats(collection)
249 250
        size_before = info["partitions"][0]["segments"][0]["data_size"]
        logging.getLogger().info(info["partitions"])
251
        delete_ids = ids[:1500]
D
del-zhenwu 已提交
252
        status = connect.delete_entity_by_id(collection, delete_ids)
253
        assert status.OK()
254
        connect.flush([collection])
X
Xiaohai Xu 已提交
255
        status = connect.compact(collection)
256
        assert status.OK()
X
Xiaohai Xu 已提交
257
        # get collection info after compact
258
        info = connect.get_collection_stats(collection)
259 260
        logging.getLogger().info(info["partitions"])
        size_after = info["partitions"][0]["segments"][0]["data_size"]
261
        assert(size_before >= size_after)
262

263
    @pytest.mark.timeout(COMPACT_TIMEOUT)
264
    def test_add_entity_and_compact_twice(self, connect, collection):
265
        '''
266 267
        target: test add entity and compact twice
        method: add entity and compact collection twice
268 269
        expected: status ok, data size no change
        '''
270 271
        ids = connect.insert(collection, entity)
        connect.flush([collection])
X
Xiaohai Xu 已提交
272
        # get collection info before compact
273
        info = connect.get_collection_stats(collection)
274
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
275
        status = connect.compact(collection)
276
        assert status.OK()
277
        connect.flush([collection])
X
Xiaohai Xu 已提交
278
        # get collection info after compact
279
        info = connect.get_collection_stats(collection)
280
        size_after = info["partitions"][0]["segments"][0]["data_size"]
281
        assert(size_before == size_after)
X
Xiaohai Xu 已提交
282
        status = connect.compact(collection)
283
        assert status.OK()
X
Xiaohai Xu 已提交
284
        # get collection info after compact twice
285
        info = connect.get_collection_stats(collection)
286
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
287 288 289
        assert(size_after == size_after_twice)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
290
    def test_insert_delete_part_and_compact_twice(self, connect, collection):
291
        '''
292 293
        target: test add entities, delete part of them and compact twice
        method: add entities, delete part and compact collection twice
294 295
        expected: status ok, data size smaller after first compact, no change after second
        '''
296 297
        ids = connect.insert(collection, entities)
        connect.flush([collection])
298
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
299
        status = connect.delete_entity_by_id(collection, delete_ids)
300
        assert status.OK()
301
        connect.flush([collection])
X
Xiaohai Xu 已提交
302
        # get collection info before compact
303
        info = connect.get_collection_stats(collection)
304
        size_before = info["partitions"][0]["data_size"]
X
Xiaohai Xu 已提交
305
        status = connect.compact(collection)
306
        assert status.OK()
X
Xiaohai Xu 已提交
307
        # get collection info after compact
308
        info = connect.get_collection_stats(collection)
309
        size_after = info["partitions"][0]["data_size"]
G
groot 已提交
310
        assert(size_before >= size_after)
X
Xiaohai Xu 已提交
311
        status = connect.compact(collection)
312
        assert status.OK()
X
Xiaohai Xu 已提交
313
        # get collection info after compact twice
314
        info = connect.get_collection_stats(collection)
315
        size_after_twice = info["partitions"][0]["data_size"]
316 317 318
        assert(size_after == size_after_twice)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
319
    def test_compact_multi_collections(self, connect):
320
        '''
X
Xiaohai Xu 已提交
321
        target: test compact works or not with multiple collections
322
        method: create 50 collections, add entities into them and compact in turn
323 324
        expected: status ok
        '''
325 326 327
        nb = 100
        num_collections = 20
        entities = gen_entities(nb)
X
Xiaohai Xu 已提交
328 329 330 331
        collection_list = []
        for i in range(num_collections):
            collection_name = gen_unique_str("test_compact_multi_collection_%d" % i)
            collection_list.append(collection_name)
332
            connect.create_collection(collection_name, default_fields)
X
Xiaohai Xu 已提交
333
        for i in range(num_collections):
334
            ids = connect.insert(collection_list[i], entities)
335
            connect.delete_entity_by_id(collection_list[i], ids[:nb//2])
X
Xiaohai Xu 已提交
336
            status = connect.compact(collection_list[i])
337 338 339
            assert status.OK()

    @pytest.mark.timeout(COMPACT_TIMEOUT)
340
    def test_add_entity_after_compact(self, connect, collection):
341
        '''
342 343 344
        target: test add entity after compact
        method: after compact operation, add entity
        expected: status ok, entity added
345
        '''
346 347 348
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
X
Xiaohai Xu 已提交
349
        # get collection info before compact
350
        info = connect.get_collection_stats(collection)
351
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
352
        status = connect.compact(collection)
353
        assert status.OK()
X
Xiaohai Xu 已提交
354
        # get collection info after compact
355
        info = connect.get_collection_stats(collection)
356
        size_after = info["partitions"][0]["segments"][0]["data_size"]
357
        assert(size_before == size_after)
358 359 360 361
        ids = connect.insert(collection, entity)
        connect.flush([collection])
        res = connect.count_entities(collection)
        assert res == nb+1
362 363

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
364
    def test_index_creation_after_compact(self, connect, collection, get_simple_index):
365 366 367 368 369
        '''
        target: test index creation after compact
        method: after compact operation, create index
        expected: status ok, index description no change
        '''
370 371
        ids = connect.insert(collection, entities)
        connect.flush([collection])
D
del-zhenwu 已提交
372
        status = connect.delete_entity_by_id(collection, ids[:10])
373
        assert status.OK()
374
        connect.flush([collection])
375 376
        status = connect.compact(collection)
        assert status.OK()
377
        status = connect.create_index(collection, field_name, get_simple_index)
378
        assert status.OK()
379
        # status, result = connect.get_index_info(collection)
380 381

    @pytest.mark.timeout(COMPACT_TIMEOUT)
382
    def test_delete_entities_after_compact(self, connect, collection):
383
        '''
384 385 386
        target: test delete entities after compact
        method: after compact operation, delete entities
        expected: status ok, entities deleted
387
        '''
388 389 390
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
X
Xiaohai Xu 已提交
391
        status = connect.compact(collection)
392
        assert status.OK()
393
        connect.flush([collection])
D
del-zhenwu 已提交
394
        status = connect.delete_entity_by_id(collection, ids)
395
        assert status.OK()
396 397
        connect.flush([collection])
        assert connect.count_entities(collection) == 0
398 399

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
400
    def test_search_after_compact(self, connect, collection):
401 402 403 404 405
        '''
        target: test search after compact
        method: after compact operation, search vector
        expected: status ok
        '''
406 407 408
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
X
Xiaohai Xu 已提交
409
        status = connect.compact(collection)
410
        assert status.OK()
411 412 413 414 415 416 417 418 419
        query = copy.deepcopy(default_single_query)
        query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0],
                                                                   entities[-1]["values"][-1]]
        res = connect.search(collection, query)
        logging.getLogger().debug(res)
        assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"])
        assert res[0]._distances[0] > epsilon
        assert res[1]._distances[0] < epsilon
        assert res[2]._distances[0] < epsilon
420 421


D
del-zhenwu 已提交
422
class TestCompactBinary:
423 424 425 426 427 428
    """
    ******************************************************************
      The following cases are used to test `compact` function
    ******************************************************************
    """
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
429
    def test_add_entity_and_compact(self, connect, binary_collection):
430
        '''
431
        target: test add binary vector and compact
X
Xiaohai Xu 已提交
432
        method: add vector and compact collection
433 434
        expected: status ok, vector added
        '''
D
del-zhenwu 已提交
435
        ids = connect.insert(binary_collection, binary_entity)
436
        assert len(ids) == 1
D
del-zhenwu 已提交
437
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
438
        # get collection info before compact
D
del-zhenwu 已提交
439
        info = connect.get_collection_stats(binary_collection)
440
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
441
        status = connect.compact(binary_collection)
442
        assert status.OK()
X
Xiaohai Xu 已提交
443
        # get collection info after compact
D
del-zhenwu 已提交
444
        info = connect.get_collection_stats(binary_collection)
445
        size_after = info["partitions"][0]["segments"][0]["data_size"]
446
        assert(size_before == size_after)
447

448
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
449
    def test_insert_and_compact(self, connect, binary_collection):
450
        '''
451 452 453
        target: test add entities with binary vector and compact
        method: add entities and compact collection
        expected: status ok, entities added
454
        '''
D
del-zhenwu 已提交
455
        ids = connect.insert(binary_collection, binary_entities)
456
        assert len(ids) == nb
D
del-zhenwu 已提交
457
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
458
        # get collection info before compact
D
del-zhenwu 已提交
459
        info = connect.get_collection_stats(binary_collection)
460
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
461
        status = connect.compact(binary_collection)
462
        assert status.OK()
X
Xiaohai Xu 已提交
463
        # get collection info after compact
D
del-zhenwu 已提交
464
        info = connect.get_collection_stats(binary_collection)
465
        size_after = info["partitions"][0]["segments"][0]["data_size"]
466 467 468
        assert(size_before == size_after)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
469
    def test_insert_delete_part_and_compact(self, connect, binary_collection):
470
        '''
471 472
        target: test add entities, delete part of them and compact 
        method: add entities, delete a few and compact collection
473 474
        expected: status ok, data size is smaller after compact
        '''
D
del-zhenwu 已提交
475
        ids = connect.insert(binary_collection, binary_entities)
476
        assert len(ids) == nb
D
del-zhenwu 已提交
477
        connect.flush([binary_collection])
478
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
479
        status = connect.delete_entity_by_id(binary_collection, delete_ids)
480
        assert status.OK()
D
del-zhenwu 已提交
481
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
482
        # get collection info before compact
D
del-zhenwu 已提交
483
        info = connect.get_collection_stats(binary_collection)
484
        logging.getLogger().info(info["partitions"])
485
        size_before = info["partitions"][0]["data_size"]
486
        logging.getLogger().info(size_before)
D
del-zhenwu 已提交
487
        status = connect.compact(binary_collection)
488
        assert status.OK()
X
Xiaohai Xu 已提交
489
        # get collection info after compact
D
del-zhenwu 已提交
490
        info = connect.get_collection_stats(binary_collection)
491
        logging.getLogger().info(info["partitions"])
492
        size_after = info["partitions"][0]["data_size"]
493
        logging.getLogger().info(size_after)
G
groot 已提交
494
        assert(size_before >= size_after)
495
    
496
    @pytest.mark.level(2)
497
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
498
    def test_insert_delete_all_and_compact(self, connect, binary_collection):
499
        '''
500 501
        target: test add entities, delete them and compact 
        method: add entities, delete all and compact collection
X
Xiaohai Xu 已提交
502
        expected: status ok, no data size in collection info because collection is empty
503
        '''
D
del-zhenwu 已提交
504
        ids = connect.insert(binary_collection, binary_entities)
505
        assert len(ids) == nb
D
del-zhenwu 已提交
506 507
        connect.flush([binary_collection])
        status = connect.delete_entity_by_id(binary_collection, ids)
508
        assert status.OK()
D
del-zhenwu 已提交
509
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
510
        # get collection info before compact
D
del-zhenwu 已提交
511 512
        info = connect.get_collection_stats(binary_collection)
        status = connect.compact(binary_collection)
513
        assert status.OK()
X
Xiaohai Xu 已提交
514
        # get collection info after compact
D
del-zhenwu 已提交
515
        info = connect.get_collection_stats(binary_collection)
516
        assert status.OK()
517 518
        logging.getLogger().info(info["partitions"])
        assert not info["partitions"][0]["segments"]
519

520
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
521
    def test_add_entity_and_compact_twice(self, connect, binary_collection):
522
        '''
523 524
        target: test add entity and compact twice
        method: add entity and compact collection twice
525 526
        expected: status ok
        '''
D
del-zhenwu 已提交
527
        ids = connect.insert(binary_collection, binary_entity)
528
        assert len(ids) == 1
D
del-zhenwu 已提交
529
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
530
        # get collection info before compact
D
del-zhenwu 已提交
531
        info = connect.get_collection_stats(binary_collection)
532
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
533
        status = connect.compact(binary_collection)
534
        assert status.OK()
X
Xiaohai Xu 已提交
535
        # get collection info after compact
D
del-zhenwu 已提交
536
        info = connect.get_collection_stats(binary_collection)
537
        size_after = info["partitions"][0]["segments"][0]["data_size"]
538
        assert(size_before == size_after)
D
del-zhenwu 已提交
539
        status = connect.compact(binary_collection)
540
        assert status.OK()
X
Xiaohai Xu 已提交
541
        # get collection info after compact twice
D
del-zhenwu 已提交
542
        info = connect.get_collection_stats(binary_collection)
543
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
544 545 546
        assert(size_after == size_after_twice)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
547
    def test_insert_delete_part_and_compact_twice(self, connect, binary_collection):
548
        '''
549 550
        target: test add entities, delete part of them and compact twice
        method: add entities, delete part and compact collection twice
551 552
        expected: status ok, data size smaller after first compact, no change after second
        '''
D
del-zhenwu 已提交
553
        ids = connect.insert(binary_collection, binary_entities)
554
        assert len(ids) == nb
D
del-zhenwu 已提交
555
        connect.flush([binary_collection])
556
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
557
        status = connect.delete_entity_by_id(binary_collection, delete_ids)
558
        assert status.OK()
D
del-zhenwu 已提交
559
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
560
        # get collection info before compact
D
del-zhenwu 已提交
561
        info = connect.get_collection_stats(binary_collection)
562
        size_before = info["partitions"][0]["data_size"]
D
del-zhenwu 已提交
563
        status = connect.compact(binary_collection)
564
        assert status.OK()
X
Xiaohai Xu 已提交
565
        # get collection info after compact
D
del-zhenwu 已提交
566
        info = connect.get_collection_stats(binary_collection)
567
        size_after = info["partitions"][0]["data_size"]
G
groot 已提交
568
        assert(size_before >= size_after)
D
del-zhenwu 已提交
569
        status = connect.compact(binary_collection)
570
        assert status.OK()
X
Xiaohai Xu 已提交
571
        # get collection info after compact twice
D
del-zhenwu 已提交
572
        info = connect.get_collection_stats(binary_collection)
573
        size_after_twice = info["partitions"][0]["data_size"]
574 575 576
        assert(size_after == size_after_twice)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
577
    def test_compact_multi_collections(self, connect):
578
        '''
X
Xiaohai Xu 已提交
579
        target: test compact works or not with multiple collections
580
        method: create 10 collections, add entities into them and compact in turn
581 582 583
        expected: status ok
        '''
        nq = 100
X
Xiaohai Xu 已提交
584
        num_collections = 10
585
        tmp, entities = gen_binary_entities(nq)
X
Xiaohai Xu 已提交
586 587 588 589
        collection_list = []
        for i in range(num_collections):
            collection_name = gen_unique_str("test_compact_multi_collection_%d" % i)
            collection_list.append(collection_name)
590
            connect.create_collection(collection_name, default_binary_fields)
X
Xiaohai Xu 已提交
591
        for i in range(num_collections):
592 593
            ids = connect.insert(collection_list[i], entities)
            assert len(ids) == nq
D
del-zhenwu 已提交
594
            status = connect.delete_entity_by_id(collection_list[i], [ids[0], ids[-1]])
595
            assert status.OK()
596
            connect.flush([collection_list[i]])
X
Xiaohai Xu 已提交
597
            status = connect.compact(collection_list[i])
598
            assert status.OK()
599 600
            status = connect.drop_collection(collection_list[i])
            assert status.OK()
601

602
    @pytest.mark.level(2)
603
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
604
    def test_add_entity_after_compact(self, connect, binary_collection):
605
        '''
606 607 608
        target: test add entity after compact
        method: after compact operation, add entity
        expected: status ok, entity added
609
        '''
D
del-zhenwu 已提交
610 611
        ids = connect.insert(binary_collection, binary_entities)
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
612
        # get collection info before compact
D
del-zhenwu 已提交
613
        info = connect.get_collection_stats(binary_collection)
614
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
615
        status = connect.compact(binary_collection)
616
        assert status.OK()
X
Xiaohai Xu 已提交
617
        # get collection info after compact
D
del-zhenwu 已提交
618
        info = connect.get_collection_stats(binary_collection)
619
        size_after = info["partitions"][0]["segments"][0]["data_size"]
620
        assert(size_before == size_after)
D
del-zhenwu 已提交
621 622 623
        ids = connect.insert(binary_collection, binary_entity)
        connect.flush([binary_collection])
        res = connect.count_entities(binary_collection)
624
        assert res == nb + 1
625 626

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
627
    def test_delete_entities_after_compact(self, connect, binary_collection):
628
        '''
629 630 631
        target: test delete entities after compact
        method: after compact operation, delete entities
        expected: status ok, entities deleted
632
        '''
D
del-zhenwu 已提交
633 634 635
        ids = connect.insert(binary_collection, binary_entities)
        connect.flush([binary_collection])
        status = connect.compact(binary_collection)
636
        assert status.OK()
D
del-zhenwu 已提交
637 638
        connect.flush([binary_collection])
        status = connect.delete_entity_by_id(binary_collection, ids)
639
        assert status.OK()
D
del-zhenwu 已提交
640 641
        connect.flush([binary_collection])
        res = connect.count_entities(binary_collection)
642
        assert res == 0
643

644
    @pytest.mark.level(2)
645
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
646
    def test_search_after_compact(self, connect, binary_collection):
647 648 649 650 651
        '''
        target: test search after compact
        method: after compact operation, search vector
        expected: status ok
        '''
D
del-zhenwu 已提交
652
        ids = connect.insert(binary_collection, binary_entities)
653
        assert len(ids) == nb
D
del-zhenwu 已提交
654 655
        connect.flush([binary_collection])
        status = connect.compact(binary_collection)
656
        assert status.OK()
657 658
        query_vecs = [raw_vectors[0]]
        distance = jaccard(query_vecs[0], raw_vectors[0])
659 660 661
        query = copy.deepcopy(default_binary_single_query)
        query["bool"]["must"][0]["vector"][binary_field_name]["query"] = [binary_entities[-1]["values"][0],
                                                                   binary_entities[-1]["values"][-1]]
662

663
        res = connect.search(binary_collection, query)
664
        assert abs(res[0]._distances[0]-distance) <= epsilon
665 666

    @pytest.mark.timeout(COMPACT_TIMEOUT)
667
    def test_search_after_compact_ip(self, connect, collection):
668 669 670 671 672
        '''
        target: test search after compact
        method: after compact operation, search vector
        expected: status ok
        '''
673
        ids = connect.insert(collection, entities)
674
        assert len(ids) == nb
675 676 677
        connect.flush([collection])
        status = connect.compact(collection)
        query = ip_query()
678 679
        query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0],
                                                                   entities[-1]["values"][-1]]
680
        res = connect.search(collection, query)
681
        logging.getLogger().info(res)
682 683 684 685
        assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"])
        assert res[0]._distances[0] < 1 - epsilon
        assert res[1]._distances[0] > 1 - epsilon
        assert res[2]._distances[0] > 1 - epsilon