test_compact.py 30.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
import time
import pdb
import threading
import logging
from multiprocessing import Pool, Process
import pytest
from utils import *

dim = 128
index_file_size = 10
11
COMPACT_TIMEOUT = 180
12 13 14 15
nprobe = 1
top_k = 1
tag = "1970-01-01"
nb = 6000
16
nq = 2
17
segment_row_count = 5000
18 19 20 21 22
entity = gen_entities(1)
entities = gen_entities(nb)
raw_vector, binary_entity = gen_binary_entities(1)
raw_vectors, binary_entities = gen_binary_entities(nb)
default_fields = gen_default_fields()
D
del-zhenwu 已提交
23
default_binary_fields = gen_binary_default_fields()
24
field_name = default_float_vec_field_name
25
binary_field_name = default_binary_vec_field_name
26 27 28
default_single_query = {
    "bool": {
        "must": [
29
            {"vector": {field_name: {"topk": 10, "query": gen_vectors(1, dim), "metric_type":"L2",
30
                                     "params": {"nprobe": 10}}}}
31 32 33
        ]
    }
}
34 35 36 37 38 39 40 41
default_binary_single_query = {
    "bool": {
        "must": [
            {"vector": {binary_field_name: {"topk": 10, "query": gen_binary_vectors(1, dim), "metric_type":"JACCARD",
                                     "params": {"nprobe": 10}}}}
        ]
    }
}
42
default_query, default_query_vecs = gen_query_vectors(binary_field_name, binary_entities, top_k, nq)
43

44

45 46 47 48 49
def ip_query():
    query = copy.deepcopy(default_single_query)
    query["bool"]["must"][0]["vector"][field_name].update({"metric_type": "IP"})
    return query

50 51 52 53 54 55 56 57

class TestCompactBase:
    """
    ******************************************************************
      The following cases are used to test `compact` function
    ******************************************************************
    """
    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
58
    def test_compact_collection_name_None(self, connect, collection):
59
        '''
X
Xiaohai Xu 已提交
60 61
        target: compact collection where collection name is None
        method: compact with the collection_name: None
62 63
        expected: exception raised
        '''
X
Xiaohai Xu 已提交
64
        collection_name = None
65
        with pytest.raises(Exception) as e:
X
Xiaohai Xu 已提交
66
            status = connect.compact(collection_name)
67 68

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
69
    def test_compact_collection_name_not_existed(self, connect, collection):
70
        '''
X
Xiaohai Xu 已提交
71 72
        target: compact collection not existed
        method: compact with a random collection_name, which is not in db
73
        expected: exception raised
74
        '''
75 76 77
        collection_name = gen_unique_str("not_existed")
        with pytest.raises(Exception) as e:
            status = connect.compact(collection_name)
78 79 80
    
    @pytest.fixture(
        scope="function",
81
        params=gen_invalid_strs()
82
    )
X
Xiaohai Xu 已提交
83
    def get_collection_name(self, request):
84 85 86
        yield request.param

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
87
    def test_compact_collection_name_invalid(self, connect, get_collection_name):
88
        '''
X
Xiaohai Xu 已提交
89 90
        target: compact collection with invalid name
        method: compact with invalid collection_name
91
        expected: exception raised
92
        '''
X
Xiaohai Xu 已提交
93
        collection_name = get_collection_name
94 95 96
        with pytest.raises(Exception) as e:
            status = connect.compact(collection_name)
            # assert not status.OK()
97
    
98
    @pytest.mark.level(2)
99
    @pytest.mark.timeout(COMPACT_TIMEOUT)
100
    def test_add_entity_and_compact(self, connect, collection):
101
        '''
102 103 104
        target: test add entity and compact
        method: add entity and compact collection
        expected: data_size before and after Compact
105
        '''
106 107 108 109
        # vector = gen_single_vector(dim)
        ids = connect.insert(collection, entity)
        assert len(ids) == 1
        connect.flush([collection])
X
Xiaohai Xu 已提交
110
        # get collection info before compact
111
        info = connect.get_collection_stats(collection)
112
        logging.getLogger().info(info)
113
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
114
        status = connect.compact(collection)
115
        assert status.OK()
X
Xiaohai Xu 已提交
116
        # get collection info after compact
117
        info = connect.get_collection_stats(collection)
118
        size_after = info["partitions"][0]["segments"][0]["data_size"]
119
        assert(size_before == size_after)
120

121
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
122
    def test_insert_and_compact(self, connect, collection):
123
        '''
124 125 126
        target: test add entities and compact 
        method: add entities and compact collection
        expected: data_size before and after Compact
127
        '''
128 129 130
        # entities = gen_vector(nb, dim)
        ids = connect.insert(collection, entities)
        connect.flush([collection])
X
Xiaohai Xu 已提交
131
        # get collection info before compact
132 133
        info = connect.get_collection_stats(collection)
        # assert status.OK()
134
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
135
        status = connect.compact(collection)
136
        assert status.OK()
X
Xiaohai Xu 已提交
137
        # get collection info after compact
138 139
        info = connect.get_collection_stats(collection)
        # assert status.OK()
140
        size_after = info["partitions"][0]["segments"][0]["data_size"]
141 142 143
        assert(size_before == size_after)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
144
    def test_insert_delete_part_and_compact(self, connect, collection):
145
        '''
146 147 148
        target: test add entities, delete part of them and compact
        method: add entities, delete a few and compact collection
        expected: status ok, data size maybe is smaller after compact
149
        '''
150 151 152
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
153
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
154
        status = connect.delete_entity_by_id(collection, delete_ids)
155
        assert status.OK()
156
        connect.flush([collection])
X
Xiaohai Xu 已提交
157
        # get collection info before compact
158
        info = connect.get_collection_stats(collection)
159 160
        logging.getLogger().info(info["partitions"])
        size_before = info["partitions"][0]["segments"][0]["data_size"]
161
        logging.getLogger().info(size_before)
X
Xiaohai Xu 已提交
162
        status = connect.compact(collection)
163
        assert status.OK()
X
Xiaohai Xu 已提交
164
        # get collection info after compact
165
        info = connect.get_collection_stats(collection)
166 167
        logging.getLogger().info(info["partitions"])
        size_after = info["partitions"][0]["segments"][0]["data_size"]
168
        logging.getLogger().info(size_after)
G
groot 已提交
169
        assert(size_before >= size_after)
170
    
171 172
    # TODO
    @pytest.mark.skip("not implement")
173
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
174
    def test_insert_delete_all_and_compact(self, connect, collection):
175
        '''
176 177
        target: test add entities, delete them and compact 
        method: add entities, delete all and compact collection
X
Xiaohai Xu 已提交
178
        expected: status ok, no data size in collection info because collection is empty
179
        '''
180 181 182
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
D
del-zhenwu 已提交
183
        status = connect.delete_entity_by_id(collection, ids)
184
        assert status.OK()
185
        connect.flush([collection])
X
Xiaohai Xu 已提交
186
        # get collection info before compact
187
        info = connect.get_collection_stats(collection)
X
Xiaohai Xu 已提交
188
        status = connect.compact(collection)
189
        assert status.OK()
X
Xiaohai Xu 已提交
190
        # get collection info after compact
191
        info = connect.get_collection_stats(collection)
192 193
        logging.getLogger().info(info["partitions"])
        assert not info["partitions"][0]["segments"]
194

D
del-zhenwu 已提交
195 196 197
    @pytest.mark.timeout(COMPACT_TIMEOUT)
    def test_insert_partition_delete_half_and_compact(self, connect, collection):
        '''
198 199
        target: test add entities into partition, delete them and compact 
        method: add entities, delete half of entities in partition and compact collection
D
del-zhenwu 已提交
200 201
        expected: status ok, data_size less than the older version
        '''
202 203 204 205 206
        connect.create_partition(collection, tag)
        assert connect.has_partition(collection, tag)
        ids = connect.insert(collection, entities, partition_tag=tag)
        connect.flush([collection])
        info = connect.get_collection_stats(collection)
D
del-zhenwu 已提交
207 208 209 210 211
        logging.getLogger().info(info["partitions"])

        delete_ids = ids[:3000]
        status = connect.delete_entity_by_id(collection, delete_ids)
        assert status.OK()
212
        connect.flush([collection])
D
del-zhenwu 已提交
213
        # get collection info before compact
214
        info = connect.get_collection_stats(collection)
D
del-zhenwu 已提交
215 216 217 218
        logging.getLogger().info(info["partitions"])
        status = connect.compact(collection)
        assert status.OK()
        # get collection info after compact
219
        info_after = connect.get_collection_stats(collection)
D
del-zhenwu 已提交
220 221 222
        logging.getLogger().info(info_after["partitions"])
        assert info["partitions"][1]["segments"][0]["data_size"] > info_after["partitions"][1]["segments"][0]["data_size"]

223 224
    @pytest.fixture(
        scope="function",
225
        params=gen_simple_index()
226
    )
227
    def get_simple_index(self, request, connect):
228 229 230 231 232 233
        if str(connect._cmd("mode")) == "GPU":
            if not request.param["index_type"] not in ivf():
                pytest.skip("Only support index_type: idmap/ivf")
        if str(connect._cmd("mode")) == "CPU":
            if request.param["index_type"] in index_cpu_not_support():
                pytest.skip("CPU not support index_type: ivf_sq8h")
234 235
        return request.param

236
    @pytest.mark.level(2)
X
Xiaohai Xu 已提交
237
    def test_compact_after_index_created(self, connect, collection, get_simple_index):
238
        '''
X
Xiaohai Xu 已提交
239
        target: test compact collection after index created
240
        method: add entities, create index, delete part of entities and compact
241 242 243
        expected: status ok, index description no change, data size smaller after compact
        '''
        count = 10
244 245
        ids = connect.insert(collection, entities)
        connect.flush([collection])
246
        connect.create_index(collection, field_name, get_simple_index)
247
        connect.flush([collection])
X
Xiaohai Xu 已提交
248
        # get collection info before compact
249
        info = connect.get_collection_stats(collection)
250 251
        size_before = info["partitions"][0]["segments"][0]["data_size"]
        logging.getLogger().info(info["partitions"])
252
        delete_ids = ids[:1500]
D
del-zhenwu 已提交
253
        status = connect.delete_entity_by_id(collection, delete_ids)
254
        assert status.OK()
255
        connect.flush([collection])
X
Xiaohai Xu 已提交
256
        status = connect.compact(collection)
257
        assert status.OK()
X
Xiaohai Xu 已提交
258
        # get collection info after compact
259
        info = connect.get_collection_stats(collection)
260 261
        logging.getLogger().info(info["partitions"])
        size_after = info["partitions"][0]["segments"][0]["data_size"]
262
        assert(size_before >= size_after)
263

264
    @pytest.mark.timeout(COMPACT_TIMEOUT)
265
    def test_add_entity_and_compact_twice(self, connect, collection):
266
        '''
267 268
        target: test add entity and compact twice
        method: add entity and compact collection twice
269 270
        expected: status ok, data size no change
        '''
271 272
        ids = connect.insert(collection, entity)
        connect.flush([collection])
X
Xiaohai Xu 已提交
273
        # get collection info before compact
274
        info = connect.get_collection_stats(collection)
275
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
276
        status = connect.compact(collection)
277
        assert status.OK()
278
        connect.flush([collection])
X
Xiaohai Xu 已提交
279
        # get collection info after compact
280
        info = connect.get_collection_stats(collection)
281
        size_after = info["partitions"][0]["segments"][0]["data_size"]
282
        assert(size_before == size_after)
X
Xiaohai Xu 已提交
283
        status = connect.compact(collection)
284
        assert status.OK()
X
Xiaohai Xu 已提交
285
        # get collection info after compact twice
286
        info = connect.get_collection_stats(collection)
287
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
288 289 290
        assert(size_after == size_after_twice)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
291
    def test_insert_delete_part_and_compact_twice(self, connect, collection):
292
        '''
293 294
        target: test add entities, delete part of them and compact twice
        method: add entities, delete part and compact collection twice
295 296
        expected: status ok, data size smaller after first compact, no change after second
        '''
297 298
        ids = connect.insert(collection, entities)
        connect.flush([collection])
299
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
300
        status = connect.delete_entity_by_id(collection, delete_ids)
301
        assert status.OK()
302
        connect.flush([collection])
X
Xiaohai Xu 已提交
303
        # get collection info before compact
304
        info = connect.get_collection_stats(collection)
305
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
306
        status = connect.compact(collection)
307
        assert status.OK()
X
Xiaohai Xu 已提交
308
        # get collection info after compact
309
        info = connect.get_collection_stats(collection)
310
        size_after = info["partitions"][0]["segments"][0]["data_size"]
G
groot 已提交
311
        assert(size_before >= size_after)
X
Xiaohai Xu 已提交
312
        status = connect.compact(collection)
313
        assert status.OK()
X
Xiaohai Xu 已提交
314
        # get collection info after compact twice
315
        info = connect.get_collection_stats(collection)
316
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
317 318 319
        assert(size_after == size_after_twice)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
320
    def test_compact_multi_collections(self, connect):
321
        '''
X
Xiaohai Xu 已提交
322
        target: test compact works or not with multiple collections
323
        method: create 50 collections, add entities into them and compact in turn
324 325
        expected: status ok
        '''
326 327 328
        nb = 100
        num_collections = 20
        entities = gen_entities(nb)
X
Xiaohai Xu 已提交
329 330 331 332
        collection_list = []
        for i in range(num_collections):
            collection_name = gen_unique_str("test_compact_multi_collection_%d" % i)
            collection_list.append(collection_name)
333
            connect.create_collection(collection_name, default_fields)
X
Xiaohai Xu 已提交
334
        for i in range(num_collections):
335
            ids = connect.insert(collection_list[i], entities)
336
            connect.delete_entity_by_id(collection_list[i], ids[:nb//2])
X
Xiaohai Xu 已提交
337
            status = connect.compact(collection_list[i])
338 339 340
            assert status.OK()

    @pytest.mark.timeout(COMPACT_TIMEOUT)
341
    def test_add_entity_after_compact(self, connect, collection):
342
        '''
343 344 345
        target: test add entity after compact
        method: after compact operation, add entity
        expected: status ok, entity added
346
        '''
347 348 349
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
X
Xiaohai Xu 已提交
350
        # get collection info before compact
351
        info = connect.get_collection_stats(collection)
352
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
353
        status = connect.compact(collection)
354
        assert status.OK()
X
Xiaohai Xu 已提交
355
        # get collection info after compact
356
        info = connect.get_collection_stats(collection)
357
        size_after = info["partitions"][0]["segments"][0]["data_size"]
358
        assert(size_before == size_after)
359 360 361 362
        ids = connect.insert(collection, entity)
        connect.flush([collection])
        res = connect.count_entities(collection)
        assert res == nb+1
363 364

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
365
    def test_index_creation_after_compact(self, connect, collection, get_simple_index):
366 367 368 369 370
        '''
        target: test index creation after compact
        method: after compact operation, create index
        expected: status ok, index description no change
        '''
371 372
        ids = connect.insert(collection, entities)
        connect.flush([collection])
D
del-zhenwu 已提交
373
        status = connect.delete_entity_by_id(collection, ids[:10])
374
        assert status.OK()
375
        connect.flush([collection])
376 377
        status = connect.compact(collection)
        assert status.OK()
378
        status = connect.create_index(collection, field_name, get_simple_index)
379
        assert status.OK()
380
        # status, result = connect.get_index_info(collection)
381 382

    @pytest.mark.timeout(COMPACT_TIMEOUT)
383
    def test_delete_entities_after_compact(self, connect, collection):
384
        '''
385 386 387
        target: test delete entities after compact
        method: after compact operation, delete entities
        expected: status ok, entities deleted
388
        '''
389 390 391
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
X
Xiaohai Xu 已提交
392
        status = connect.compact(collection)
393
        assert status.OK()
394
        connect.flush([collection])
D
del-zhenwu 已提交
395
        status = connect.delete_entity_by_id(collection, ids)
396
        assert status.OK()
397 398
        connect.flush([collection])
        assert connect.count_entities(collection) == 0
399 400

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
401
    def test_search_after_compact(self, connect, collection):
402 403 404 405 406
        '''
        target: test search after compact
        method: after compact operation, search vector
        expected: status ok
        '''
407 408 409
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
X
Xiaohai Xu 已提交
410
        status = connect.compact(collection)
411
        assert status.OK()
412 413 414 415 416 417 418 419 420
        query = copy.deepcopy(default_single_query)
        query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0],
                                                                   entities[-1]["values"][-1]]
        res = connect.search(collection, query)
        logging.getLogger().debug(res)
        assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"])
        assert res[0]._distances[0] > epsilon
        assert res[1]._distances[0] < epsilon
        assert res[2]._distances[0] < epsilon
421

422 423
    # TODO: enable
    def _test_compact_server_crashed_recovery(self, connect, collection):
424 425
        '''
        target: test compact when server crashed unexpectedly and restarted
426
        method: add entities, delete and compact collection; server stopped and restarted during compact
427 428
        expected: status ok, request recovered
        '''
D
del-zhenwu 已提交
429
        entities = gen_vectors(nb * 100, dim)
430
        status, ids = connect.insert(collection, entities)
431
        assert status.OK()
X
Xiaohai Xu 已提交
432
        status = connect.flush([collection])
433 434
        assert status.OK()
        delete_ids = ids[0:1000]
D
del-zhenwu 已提交
435
        status = connect.delete_entity_by_id(collection, delete_ids)
436
        assert status.OK()
X
Xiaohai Xu 已提交
437
        status = connect.flush([collection])
438 439 440
        assert status.OK()
        # start to compact, kill and restart server
        logging.getLogger().info("compact starting...")
X
Xiaohai Xu 已提交
441
        status = connect.compact(collection)
442 443
        # pdb.set_trace()
        assert status.OK()
X
Xiaohai Xu 已提交
444
        # get collection info after compact
D
del-zhenwu 已提交
445
        status, info = connect.get_collection_stats(collection)
446
        assert status.OK()
447
        assert info["partitions"][0].count == nb * 100 - 1000
448 449


D
del-zhenwu 已提交
450
class TestCompactBinary:
451 452 453 454 455 456
    """
    ******************************************************************
      The following cases are used to test `compact` function
    ******************************************************************
    """
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
457
    def test_add_entity_and_compact(self, connect, binary_collection):
458
        '''
459
        target: test add binary vector and compact
X
Xiaohai Xu 已提交
460
        method: add vector and compact collection
461 462
        expected: status ok, vector added
        '''
D
del-zhenwu 已提交
463
        ids = connect.insert(binary_collection, binary_entity)
464
        assert len(ids) == 1
D
del-zhenwu 已提交
465
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
466
        # get collection info before compact
D
del-zhenwu 已提交
467
        info = connect.get_collection_stats(binary_collection)
468
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
469
        status = connect.compact(binary_collection)
470
        assert status.OK()
X
Xiaohai Xu 已提交
471
        # get collection info after compact
D
del-zhenwu 已提交
472
        info = connect.get_collection_stats(binary_collection)
473
        size_after = info["partitions"][0]["segments"][0]["data_size"]
474
        assert(size_before == size_after)
475

476
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
477
    def test_insert_and_compact(self, connect, binary_collection):
478
        '''
479 480 481
        target: test add entities with binary vector and compact
        method: add entities and compact collection
        expected: status ok, entities added
482
        '''
D
del-zhenwu 已提交
483
        ids = connect.insert(binary_collection, binary_entities)
484
        assert len(ids) == nb
D
del-zhenwu 已提交
485
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
486
        # get collection info before compact
D
del-zhenwu 已提交
487
        info = connect.get_collection_stats(binary_collection)
488
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
489
        status = connect.compact(binary_collection)
490
        assert status.OK()
X
Xiaohai Xu 已提交
491
        # get collection info after compact
D
del-zhenwu 已提交
492
        info = connect.get_collection_stats(binary_collection)
493
        size_after = info["partitions"][0]["segments"][0]["data_size"]
494 495 496
        assert(size_before == size_after)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
497
    def test_insert_delete_part_and_compact(self, connect, binary_collection):
498
        '''
499 500
        target: test add entities, delete part of them and compact 
        method: add entities, delete a few and compact collection
501 502
        expected: status ok, data size is smaller after compact
        '''
D
del-zhenwu 已提交
503
        ids = connect.insert(binary_collection, binary_entities)
504
        assert len(ids) == nb
D
del-zhenwu 已提交
505
        connect.flush([binary_collection])
506
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
507
        status = connect.delete_entity_by_id(binary_collection, delete_ids)
508
        assert status.OK()
D
del-zhenwu 已提交
509
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
510
        # get collection info before compact
D
del-zhenwu 已提交
511
        info = connect.get_collection_stats(binary_collection)
512 513
        logging.getLogger().info(info["partitions"])
        size_before = info["partitions"][0]["segments"][0]["data_size"]
514
        logging.getLogger().info(size_before)
D
del-zhenwu 已提交
515
        status = connect.compact(binary_collection)
516
        assert status.OK()
X
Xiaohai Xu 已提交
517
        # get collection info after compact
D
del-zhenwu 已提交
518
        info = connect.get_collection_stats(binary_collection)
519 520
        logging.getLogger().info(info["partitions"])
        size_after = info["partitions"][0]["segments"][0]["data_size"]
521
        logging.getLogger().info(size_after)
G
groot 已提交
522
        assert(size_before >= size_after)
523
    
524 525 526
    # TODO
    @pytest.mark.skip("not implement")
    @pytest.mark.level(2)
527
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
528
    def test_insert_delete_all_and_compact(self, connect, binary_collection):
529
        '''
530 531
        target: test add entities, delete them and compact 
        method: add entities, delete all and compact collection
X
Xiaohai Xu 已提交
532
        expected: status ok, no data size in collection info because collection is empty
533
        '''
D
del-zhenwu 已提交
534
        ids = connect.insert(binary_collection, binary_entities)
535
        assert len(ids) == nb
D
del-zhenwu 已提交
536 537
        connect.flush([binary_collection])
        status = connect.delete_entity_by_id(binary_collection, ids)
538
        assert status.OK()
D
del-zhenwu 已提交
539
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
540
        # get collection info before compact
D
del-zhenwu 已提交
541 542
        info = connect.get_collection_stats(binary_collection)
        status = connect.compact(binary_collection)
543
        assert status.OK()
X
Xiaohai Xu 已提交
544
        # get collection info after compact
D
del-zhenwu 已提交
545
        info = connect.get_collection_stats(binary_collection)
546
        assert status.OK()
547 548
        logging.getLogger().info(info["partitions"])
        assert not info["partitions"][0]["segments"]
549

550
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
551
    def test_add_entity_and_compact_twice(self, connect, binary_collection):
552
        '''
553 554
        target: test add entity and compact twice
        method: add entity and compact collection twice
555 556
        expected: status ok
        '''
D
del-zhenwu 已提交
557
        ids = connect.insert(binary_collection, binary_entity)
558
        assert len(ids) == 1
D
del-zhenwu 已提交
559
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
560
        # get collection info before compact
D
del-zhenwu 已提交
561
        info = connect.get_collection_stats(binary_collection)
562
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
563
        status = connect.compact(binary_collection)
564
        assert status.OK()
X
Xiaohai Xu 已提交
565
        # get collection info after compact
D
del-zhenwu 已提交
566
        info = connect.get_collection_stats(binary_collection)
567
        size_after = info["partitions"][0]["segments"][0]["data_size"]
568
        assert(size_before == size_after)
D
del-zhenwu 已提交
569
        status = connect.compact(binary_collection)
570
        assert status.OK()
X
Xiaohai Xu 已提交
571
        # get collection info after compact twice
D
del-zhenwu 已提交
572
        info = connect.get_collection_stats(binary_collection)
573
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
574 575 576
        assert(size_after == size_after_twice)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
577
    def test_insert_delete_part_and_compact_twice(self, connect, binary_collection):
578
        '''
579 580
        target: test add entities, delete part of them and compact twice
        method: add entities, delete part and compact collection twice
581 582
        expected: status ok, data size smaller after first compact, no change after second
        '''
D
del-zhenwu 已提交
583
        ids = connect.insert(binary_collection, binary_entities)
584
        assert len(ids) == nb
D
del-zhenwu 已提交
585
        connect.flush([binary_collection])
586
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
587
        status = connect.delete_entity_by_id(binary_collection, delete_ids)
588
        assert status.OK()
D
del-zhenwu 已提交
589
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
590
        # get collection info before compact
D
del-zhenwu 已提交
591
        info = connect.get_collection_stats(binary_collection)
592
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
593
        status = connect.compact(binary_collection)
594
        assert status.OK()
X
Xiaohai Xu 已提交
595
        # get collection info after compact
D
del-zhenwu 已提交
596
        info = connect.get_collection_stats(binary_collection)
597
        size_after = info["partitions"][0]["segments"][0]["data_size"]
G
groot 已提交
598
        assert(size_before >= size_after)
D
del-zhenwu 已提交
599
        status = connect.compact(binary_collection)
600
        assert status.OK()
X
Xiaohai Xu 已提交
601
        # get collection info after compact twice
D
del-zhenwu 已提交
602
        info = connect.get_collection_stats(binary_collection)
603
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
604 605 606
        assert(size_after == size_after_twice)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
607
    def test_compact_multi_collections(self, connect):
608
        '''
X
Xiaohai Xu 已提交
609
        target: test compact works or not with multiple collections
610
        method: create 10 collections, add entities into them and compact in turn
611 612 613
        expected: status ok
        '''
        nq = 100
X
Xiaohai Xu 已提交
614
        num_collections = 10
615
        tmp, entities = gen_binary_entities(nq)
X
Xiaohai Xu 已提交
616 617 618 619
        collection_list = []
        for i in range(num_collections):
            collection_name = gen_unique_str("test_compact_multi_collection_%d" % i)
            collection_list.append(collection_name)
620
            connect.create_collection(collection_name, default_binary_fields)
X
Xiaohai Xu 已提交
621
        for i in range(num_collections):
622 623
            ids = connect.insert(collection_list[i], entities)
            assert len(ids) == nq
D
del-zhenwu 已提交
624
            status = connect.delete_entity_by_id(collection_list[i], [ids[0], ids[-1]])
625
            assert status.OK()
626
            connect.flush([collection_list[i]])
X
Xiaohai Xu 已提交
627
            status = connect.compact(collection_list[i])
628
            assert status.OK()
629 630
            status = connect.drop_collection(collection_list[i])
            assert status.OK()
631

632
    @pytest.mark.level(2)
633
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
634
    def test_add_entity_after_compact(self, connect, binary_collection):
635
        '''
636 637 638
        target: test add entity after compact
        method: after compact operation, add entity
        expected: status ok, entity added
639
        '''
D
del-zhenwu 已提交
640 641
        ids = connect.insert(binary_collection, binary_entities)
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
642
        # get collection info before compact
D
del-zhenwu 已提交
643
        info = connect.get_collection_stats(binary_collection)
644
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
645
        status = connect.compact(binary_collection)
646
        assert status.OK()
X
Xiaohai Xu 已提交
647
        # get collection info after compact
D
del-zhenwu 已提交
648
        info = connect.get_collection_stats(binary_collection)
649
        size_after = info["partitions"][0]["segments"][0]["data_size"]
650
        assert(size_before == size_after)
D
del-zhenwu 已提交
651 652 653
        ids = connect.insert(binary_collection, binary_entity)
        connect.flush([binary_collection])
        res = connect.count_entities(binary_collection)
654
        assert res == nb + 1
655 656

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
657
    def test_delete_entities_after_compact(self, connect, binary_collection):
658
        '''
659 660 661
        target: test delete entities after compact
        method: after compact operation, delete entities
        expected: status ok, entities deleted
662
        '''
D
del-zhenwu 已提交
663 664 665
        ids = connect.insert(binary_collection, binary_entities)
        connect.flush([binary_collection])
        status = connect.compact(binary_collection)
666
        assert status.OK()
D
del-zhenwu 已提交
667 668
        connect.flush([binary_collection])
        status = connect.delete_entity_by_id(binary_collection, ids)
669
        assert status.OK()
D
del-zhenwu 已提交
670 671
        connect.flush([binary_collection])
        res = connect.count_entities(binary_collection)
672
        assert res == 0
673

674
    @pytest.mark.level(2)
675
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
676
    def test_search_after_compact(self, connect, binary_collection):
677 678 679 680 681
        '''
        target: test search after compact
        method: after compact operation, search vector
        expected: status ok
        '''
D
del-zhenwu 已提交
682
        ids = connect.insert(binary_collection, binary_entities)
683
        assert len(ids) == nb
D
del-zhenwu 已提交
684 685
        connect.flush([binary_collection])
        status = connect.compact(binary_collection)
686
        assert status.OK()
687 688
        query_vecs = [raw_vectors[0]]
        distance = jaccard(query_vecs[0], raw_vectors[0])
689 690 691
        query = copy.deepcopy(default_binary_single_query)
        query["bool"]["must"][0]["vector"][binary_field_name]["query"] = [binary_entities[-1]["values"][0],
                                                                   binary_entities[-1]["values"][-1]]
692

693
        res = connect.search(binary_collection, query)
694
        assert abs(res[0]._distances[0]-distance) <= epsilon
695

696
    # TODO:
697
    @pytest.mark.timeout(COMPACT_TIMEOUT)
698
    def test_search_after_compact_ip(self, connect, collection):
699 700 701 702 703
        '''
        target: test search after compact
        method: after compact operation, search vector
        expected: status ok
        '''
704
        ids = connect.insert(collection, entities)
705
        assert len(ids) == nb
706 707 708
        connect.flush([collection])
        status = connect.compact(collection)
        query = ip_query()
709 710
        query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0],
                                                                   entities[-1]["values"][-1]]
711
        res = connect.search(collection, query)
712
        logging.getLogger().info(res)
713 714 715 716
        assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"])
        assert res[0]._distances[0] < 1 - epsilon
        assert res[1]._distances[0] > 1 - epsilon
        assert res[2]._distances[0] > 1 - epsilon