test_compact.py 29.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
import time
import pdb
import threading
import logging
from multiprocessing import Pool, Process
import pytest
from utils import *

dim = 128
index_file_size = 10
11
COMPACT_TIMEOUT = 180
12 13 14 15
nprobe = 1
top_k = 1
tag = "1970-01-01"
nb = 6000
16
nq = 2
17
segment_row_count = 5000
18 19 20 21 22
entity = gen_entities(1)
entities = gen_entities(nb)
raw_vector, binary_entity = gen_binary_entities(1)
raw_vectors, binary_entities = gen_binary_entities(nb)
default_fields = gen_default_fields()
D
del-zhenwu 已提交
23
default_binary_fields = gen_binary_default_fields()
24
field_name = default_float_vec_field_name
25
binary_field_name = default_binary_vec_field_name
26 27 28
default_single_query = {
    "bool": {
        "must": [
29
            {"vector": {field_name: {"topk": 10, "query": gen_vectors(1, dim), "metric_type":"L2",
30
                                     "params": {"nprobe": 10}}}}
31 32 33
        ]
    }
}
34
default_query, default_query_vecs = gen_query_vectors(binary_field_name, binary_entities, top_k, nq)
35

36 37 38 39 40
def ip_query():
    query = copy.deepcopy(default_single_query)
    query["bool"]["must"][0]["vector"][field_name].update({"metric_type": "IP"})
    return query

41 42 43 44 45 46 47 48

class TestCompactBase:
    """
    ******************************************************************
      The following cases are used to test `compact` function
    ******************************************************************
    """
    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
49
    def test_compact_collection_name_None(self, connect, collection):
50
        '''
X
Xiaohai Xu 已提交
51 52
        target: compact collection where collection name is None
        method: compact with the collection_name: None
53 54
        expected: exception raised
        '''
X
Xiaohai Xu 已提交
55
        collection_name = None
56
        with pytest.raises(Exception) as e:
X
Xiaohai Xu 已提交
57
            status = connect.compact(collection_name)
58 59

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
60
    def test_compact_collection_name_not_existed(self, connect, collection):
61
        '''
X
Xiaohai Xu 已提交
62 63
        target: compact collection not existed
        method: compact with a random collection_name, which is not in db
64
        expected: exception raised
65
        '''
66 67 68
        collection_name = gen_unique_str("not_existed")
        with pytest.raises(Exception) as e:
            status = connect.compact(collection_name)
69 70 71
    
    @pytest.fixture(
        scope="function",
72
        params=gen_invalid_strs()
73
    )
X
Xiaohai Xu 已提交
74
    def get_collection_name(self, request):
75 76 77
        yield request.param

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
78
    def test_compact_collection_name_invalid(self, connect, get_collection_name):
79
        '''
X
Xiaohai Xu 已提交
80 81
        target: compact collection with invalid name
        method: compact with invalid collection_name
82
        expected: exception raised
83
        '''
X
Xiaohai Xu 已提交
84
        collection_name = get_collection_name
85 86 87
        with pytest.raises(Exception) as e:
            status = connect.compact(collection_name)
            # assert not status.OK()
88
    
89
    @pytest.mark.level(2)
90
    @pytest.mark.timeout(COMPACT_TIMEOUT)
91
    def test_add_entity_and_compact(self, connect, collection):
92
        '''
93 94 95
        target: test add entity and compact
        method: add entity and compact collection
        expected: data_size before and after Compact
96
        '''
97 98 99 100
        # vector = gen_single_vector(dim)
        ids = connect.insert(collection, entity)
        assert len(ids) == 1
        connect.flush([collection])
X
Xiaohai Xu 已提交
101
        # get collection info before compact
102
        info = connect.get_collection_stats(collection)
103
        logging.getLogger().info(info)
104
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
105
        status = connect.compact(collection)
106
        assert status.OK()
X
Xiaohai Xu 已提交
107
        # get collection info after compact
108
        info = connect.get_collection_stats(collection)
109
        size_after = info["partitions"][0]["segments"][0]["data_size"]
110
        assert(size_before == size_after)
111 112 113

    # TODO
    @pytest.mark.level(2)
114
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
115
    def test_insert_and_compact(self, connect, collection):
116
        '''
117 118 119
        target: test add entities and compact 
        method: add entities and compact collection
        expected: data_size before and after Compact
120
        '''
121 122 123
        # entities = gen_vector(nb, dim)
        ids = connect.insert(collection, entities)
        connect.flush([collection])
X
Xiaohai Xu 已提交
124
        # get collection info before compact
125 126
        info = connect.get_collection_stats(collection)
        # assert status.OK()
127
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
128
        status = connect.compact(collection)
129
        assert status.OK()
X
Xiaohai Xu 已提交
130
        # get collection info after compact
131 132
        info = connect.get_collection_stats(collection)
        # assert status.OK()
133
        size_after = info["partitions"][0]["segments"][0]["data_size"]
134 135 136
        assert(size_before == size_after)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
137
    def test_insert_delete_part_and_compact(self, connect, collection):
138
        '''
139 140 141
        target: test add entities, delete part of them and compact
        method: add entities, delete a few and compact collection
        expected: status ok, data size maybe is smaller after compact
142
        '''
143 144 145
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
146
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
147
        status = connect.delete_entity_by_id(collection, delete_ids)
148
        assert status.OK()
149
        connect.flush([collection])
X
Xiaohai Xu 已提交
150
        # get collection info before compact
151
        info = connect.get_collection_stats(collection)
152 153
        logging.getLogger().info(info["partitions"])
        size_before = info["partitions"][0]["segments"][0]["data_size"]
154
        logging.getLogger().info(size_before)
X
Xiaohai Xu 已提交
155
        status = connect.compact(collection)
156
        assert status.OK()
X
Xiaohai Xu 已提交
157
        # get collection info after compact
158
        info = connect.get_collection_stats(collection)
159 160
        logging.getLogger().info(info["partitions"])
        size_after = info["partitions"][0]["segments"][0]["data_size"]
161
        logging.getLogger().info(size_after)
G
groot 已提交
162
        assert(size_before >= size_after)
163 164
    
    @pytest.mark.timeout(COMPACT_TIMEOUT)
165
    @pytest.mark.skip("not implemented")
D
del-zhenwu 已提交
166
    def test_insert_delete_all_and_compact(self, connect, collection):
167
        '''
168 169
        target: test add entities, delete them and compact 
        method: add entities, delete all and compact collection
X
Xiaohai Xu 已提交
170
        expected: status ok, no data size in collection info because collection is empty
171
        '''
172 173 174
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
D
del-zhenwu 已提交
175
        status = connect.delete_entity_by_id(collection, ids)
176
        assert status.OK()
177
        connect.flush([collection])
X
Xiaohai Xu 已提交
178
        # get collection info before compact
179
        info = connect.get_collection_stats(collection)
X
Xiaohai Xu 已提交
180
        status = connect.compact(collection)
181
        assert status.OK()
X
Xiaohai Xu 已提交
182
        # get collection info after compact
183
        info = connect.get_collection_stats(collection)
184 185
        logging.getLogger().info(info["partitions"])
        assert not info["partitions"][0]["segments"]
186

D
del-zhenwu 已提交
187
    @pytest.mark.timeout(COMPACT_TIMEOUT)
188
    @pytest.mark.skip("not implemented")
D
del-zhenwu 已提交
189 190
    def test_insert_partition_delete_half_and_compact(self, connect, collection):
        '''
191 192
        target: test add entities into partition, delete them and compact 
        method: add entities, delete half of entities in partition and compact collection
D
del-zhenwu 已提交
193 194
        expected: status ok, data_size less than the older version
        '''
195 196 197 198 199
        connect.create_partition(collection, tag)
        assert connect.has_partition(collection, tag)
        ids = connect.insert(collection, entities, partition_tag=tag)
        connect.flush([collection])
        info = connect.get_collection_stats(collection)
D
del-zhenwu 已提交
200 201 202 203 204
        logging.getLogger().info(info["partitions"])

        delete_ids = ids[:3000]
        status = connect.delete_entity_by_id(collection, delete_ids)
        assert status.OK()
205
        connect.flush([collection])
D
del-zhenwu 已提交
206
        # get collection info before compact
207
        info = connect.get_collection_stats(collection)
D
del-zhenwu 已提交
208 209 210 211
        logging.getLogger().info(info["partitions"])
        status = connect.compact(collection)
        assert status.OK()
        # get collection info after compact
212
        info_after = connect.get_collection_stats(collection)
D
del-zhenwu 已提交
213 214 215
        logging.getLogger().info(info_after["partitions"])
        assert info["partitions"][1]["segments"][0]["data_size"] > info_after["partitions"][1]["segments"][0]["data_size"]

216 217
    @pytest.fixture(
        scope="function",
218
        params=gen_simple_index()
219
    )
220
    def get_simple_index(self, request, connect):
221 222 223 224 225 226
        if str(connect._cmd("mode")) == "GPU":
            if not request.param["index_type"] not in ivf():
                pytest.skip("Only support index_type: idmap/ivf")
        if str(connect._cmd("mode")) == "CPU":
            if request.param["index_type"] in index_cpu_not_support():
                pytest.skip("CPU not support index_type: ivf_sq8h")
227 228
        return request.param

229 230 231

    # TODO
    @pytest.mark.skip('not implemented')
X
Xiaohai Xu 已提交
232
    def test_compact_after_index_created(self, connect, collection, get_simple_index):
233
        '''
X
Xiaohai Xu 已提交
234
        target: test compact collection after index created
235
        method: add entities, create index, delete part of entities and compact
236 237 238
        expected: status ok, index description no change, data size smaller after compact
        '''
        count = 10
239 240
        ids = connect.insert(collection, entities)
        connect.flush([collection])
241
        connect.create_index(collection, field_name, get_simple_index)
242
        connect.flush([collection])
X
Xiaohai Xu 已提交
243
        # get collection info before compact
244
        info = connect.get_collection_stats(collection)
245 246
        size_before = info["partitions"][0]["segments"][0]["data_size"]
        logging.getLogger().info(info["partitions"])
247
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
248
        status = connect.delete_entity_by_id(collection, delete_ids)
249
        assert status.OK()
250
        connect.flush([collection])
X
Xiaohai Xu 已提交
251
        status = connect.compact(collection)
252
        assert status.OK()
X
Xiaohai Xu 已提交
253
        # get collection info after compact
254
        info = connect.get_collection_stats(collection)
255 256
        logging.getLogger().info(info["partitions"])
        size_after = info["partitions"][0]["segments"][0]["data_size"]
257
        assert(size_before >= size_after)
258 259 260

    # TODO
    @pytest.mark.level(2)
261
    @pytest.mark.timeout(COMPACT_TIMEOUT)
262
    def test_add_entity_and_compact_twice(self, connect, collection):
263
        '''
264 265
        target: test add entity and compact twice
        method: add entity and compact collection twice
266 267
        expected: status ok, data size no change
        '''
268 269
        ids = connect.insert(collection, entity)
        connect.flush([collection])
X
Xiaohai Xu 已提交
270
        # get collection info before compact
271
        info = connect.get_collection_stats(collection)
272
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
273
        status = connect.compact(collection)
274
        assert status.OK()
275
        connect.flush([collection])
X
Xiaohai Xu 已提交
276
        # get collection info after compact
277
        info = connect.get_collection_stats(collection)
278
        size_after = info["partitions"][0]["segments"][0]["data_size"]
279
        assert(size_before == size_after)
X
Xiaohai Xu 已提交
280
        status = connect.compact(collection)
281
        assert status.OK()
X
Xiaohai Xu 已提交
282
        # get collection info after compact twice
283
        info = connect.get_collection_stats(collection)
284
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
285 286 287
        assert(size_after == size_after_twice)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
288
    def test_insert_delete_part_and_compact_twice(self, connect, collection):
289
        '''
290 291
        target: test add entities, delete part of them and compact twice
        method: add entities, delete part and compact collection twice
292 293
        expected: status ok, data size smaller after first compact, no change after second
        '''
294 295
        ids = connect.insert(collection, entities)
        connect.flush([collection])
296
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
297
        status = connect.delete_entity_by_id(collection, delete_ids)
298
        assert status.OK()
299
        connect.flush([collection])
X
Xiaohai Xu 已提交
300
        # get collection info before compact
301
        info = connect.get_collection_stats(collection)
302
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
303
        status = connect.compact(collection)
304
        assert status.OK()
X
Xiaohai Xu 已提交
305
        # get collection info after compact
306
        info = connect.get_collection_stats(collection)
307
        size_after = info["partitions"][0]["segments"][0]["data_size"]
G
groot 已提交
308
        assert(size_before >= size_after)
X
Xiaohai Xu 已提交
309
        status = connect.compact(collection)
310
        assert status.OK()
X
Xiaohai Xu 已提交
311
        # get collection info after compact twice
312
        info = connect.get_collection_stats(collection)
313
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
314 315
        assert(size_after == size_after_twice)

316 317
    # TODO
    @pytest.mark.level(2)
318
    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
319
    def test_compact_multi_collections(self, connect):
320
        '''
X
Xiaohai Xu 已提交
321
        target: test compact works or not with multiple collections
322
        method: create 50 collections, add entities into them and compact in turn
323 324 325
        expected: status ok
        '''
        nq = 100
X
Xiaohai Xu 已提交
326
        num_collections = 50
327
        entities = gen_entities(nq)
X
Xiaohai Xu 已提交
328 329 330 331
        collection_list = []
        for i in range(num_collections):
            collection_name = gen_unique_str("test_compact_multi_collection_%d" % i)
            collection_list.append(collection_name)
332
            connect.create_collection(collection_name, default_fields)
333
        time.sleep(6)
X
Xiaohai Xu 已提交
334
        for i in range(num_collections):
335
            ids = connect.insert(collection_list[i], entities)
X
Xiaohai Xu 已提交
336
            status = connect.compact(collection_list[i])
337 338
            assert status.OK()

339
    @pytest.mark.level(2)
340
    @pytest.mark.timeout(COMPACT_TIMEOUT)
341
    def test_add_entity_after_compact(self, connect, collection):
342
        '''
343 344 345
        target: test add entity after compact
        method: after compact operation, add entity
        expected: status ok, entity added
346
        '''
347 348 349
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
X
Xiaohai Xu 已提交
350
        # get collection info before compact
351
        info = connect.get_collection_stats(collection)
352
        size_before = info["partitions"][0]["segments"][0]["data_size"]
X
Xiaohai Xu 已提交
353
        status = connect.compact(collection)
354
        assert status.OK()
X
Xiaohai Xu 已提交
355
        # get collection info after compact
356
        info = connect.get_collection_stats(collection)
357
        size_after = info["partitions"][0]["segments"][0]["data_size"]
358
        assert(size_before == size_after)
359 360 361 362
        ids = connect.insert(collection, entity)
        connect.flush([collection])
        res = connect.count_entities(collection)
        assert res == nb+1
363 364

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
365
    def test_index_creation_after_compact(self, connect, collection, get_simple_index):
366 367 368 369 370
        '''
        target: test index creation after compact
        method: after compact operation, create index
        expected: status ok, index description no change
        '''
371 372
        ids = connect.insert(collection, entities)
        connect.flush([collection])
D
del-zhenwu 已提交
373
        status = connect.delete_entity_by_id(collection, ids[:10])
374
        assert status.OK()
375
        connect.flush([collection])
376 377
        status = connect.compact(collection)
        assert status.OK()
378
        status = connect.create_index(collection, field_name, get_simple_index)
379
        assert status.OK()
380
        # status, result = connect.get_index_info(collection)
381 382

    @pytest.mark.timeout(COMPACT_TIMEOUT)
383
    def test_delete_entities_after_compact(self, connect, collection):
384
        '''
385 386 387
        target: test delete entities after compact
        method: after compact operation, delete entities
        expected: status ok, entities deleted
388
        '''
389 390 391
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
X
Xiaohai Xu 已提交
392
        status = connect.compact(collection)
393
        assert status.OK()
394
        connect.flush([collection])
D
del-zhenwu 已提交
395
        status = connect.delete_entity_by_id(collection, ids)
396
        assert status.OK()
397 398
        connect.flush([collection])
        assert connect.count_entities(collection) == 0
399 400

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
401
    def test_search_after_compact(self, connect, collection):
402 403 404 405 406
        '''
        target: test search after compact
        method: after compact operation, search vector
        expected: status ok
        '''
407 408 409
        ids = connect.insert(collection, entities)
        assert len(ids) == nb
        connect.flush([collection])
X
Xiaohai Xu 已提交
410
        status = connect.compact(collection)
411
        assert status.OK()
412 413 414 415 416 417 418 419 420
        query = copy.deepcopy(default_single_query)
        query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0],
                                                                   entities[-1]["values"][-1]]
        res = connect.search(collection, query)
        logging.getLogger().debug(res)
        assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"])
        assert res[0]._distances[0] > epsilon
        assert res[1]._distances[0] < epsilon
        assert res[2]._distances[0] < epsilon
421

422 423
    # TODO: enable
    def _test_compact_server_crashed_recovery(self, connect, collection):
424 425
        '''
        target: test compact when server crashed unexpectedly and restarted
426
        method: add entities, delete and compact collection; server stopped and restarted during compact
427 428
        expected: status ok, request recovered
        '''
D
del-zhenwu 已提交
429
        entities = gen_vectors(nb * 100, dim)
430
        status, ids = connect.insert(collection, entities)
431
        assert status.OK()
X
Xiaohai Xu 已提交
432
        status = connect.flush([collection])
433 434
        assert status.OK()
        delete_ids = ids[0:1000]
D
del-zhenwu 已提交
435
        status = connect.delete_entity_by_id(collection, delete_ids)
436
        assert status.OK()
X
Xiaohai Xu 已提交
437
        status = connect.flush([collection])
438 439 440
        assert status.OK()
        # start to compact, kill and restart server
        logging.getLogger().info("compact starting...")
X
Xiaohai Xu 已提交
441
        status = connect.compact(collection)
442 443
        # pdb.set_trace()
        assert status.OK()
X
Xiaohai Xu 已提交
444
        # get collection info after compact
D
del-zhenwu 已提交
445
        status, info = connect.get_collection_stats(collection)
446
        assert status.OK()
447
        assert info["partitions"][0].count == nb * 100 - 1000
448 449


D
del-zhenwu 已提交
450
class TestCompactBinary:
451 452 453 454 455 456
    """
    ******************************************************************
      The following cases are used to test `compact` function
    ******************************************************************
    """
    @pytest.mark.timeout(COMPACT_TIMEOUT)
457 458
    # TODO
    @pytest.mark.level(2)
D
del-zhenwu 已提交
459
    def test_add_entity_and_compact(self, connect, binary_collection):
460
        '''
461
        target: test add binary vector and compact
X
Xiaohai Xu 已提交
462
        method: add vector and compact collection
463 464
        expected: status ok, vector added
        '''
D
del-zhenwu 已提交
465
        ids = connect.insert(binary_collection, binary_entity)
466
        assert len(ids) == 1
D
del-zhenwu 已提交
467
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
468
        # get collection info before compact
D
del-zhenwu 已提交
469
        info = connect.get_collection_stats(binary_collection)
470
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
471
        status = connect.compact(binary_collection)
472
        assert status.OK()
X
Xiaohai Xu 已提交
473
        # get collection info after compact
D
del-zhenwu 已提交
474
        info = connect.get_collection_stats(binary_collection)
475
        size_after = info["partitions"][0]["segments"][0]["data_size"]
476
        assert(size_before == size_after)
477 478 479

    # TODO
    @pytest.mark.level(2)
480
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
481
    def test_insert_and_compact(self, connect, binary_collection):
482
        '''
483 484 485
        target: test add entities with binary vector and compact
        method: add entities and compact collection
        expected: status ok, entities added
486
        '''
D
del-zhenwu 已提交
487
        ids = connect.insert(binary_collection, binary_entities)
488
        assert len(ids) == nb
D
del-zhenwu 已提交
489
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
490
        # get collection info before compact
D
del-zhenwu 已提交
491
        info = connect.get_collection_stats(binary_collection)
492
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
493
        status = connect.compact(binary_collection)
494
        assert status.OK()
X
Xiaohai Xu 已提交
495
        # get collection info after compact
D
del-zhenwu 已提交
496
        info = connect.get_collection_stats(binary_collection)
497
        size_after = info["partitions"][0]["segments"][0]["data_size"]
498 499 500
        assert(size_before == size_after)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
501
    def test_insert_delete_part_and_compact(self, connect, binary_collection):
502
        '''
503 504
        target: test add entities, delete part of them and compact 
        method: add entities, delete a few and compact collection
505 506
        expected: status ok, data size is smaller after compact
        '''
D
del-zhenwu 已提交
507
        ids = connect.insert(binary_collection, binary_entities)
508
        assert len(ids) == nb
D
del-zhenwu 已提交
509
        connect.flush([binary_collection])
510
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
511
        status = connect.delete_entity_by_id(binary_collection, delete_ids)
512
        assert status.OK()
D
del-zhenwu 已提交
513
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
514
        # get collection info before compact
D
del-zhenwu 已提交
515
        info = connect.get_collection_stats(binary_collection)
516 517
        logging.getLogger().info(info["partitions"])
        size_before = info["partitions"][0]["segments"][0]["data_size"]
518
        logging.getLogger().info(size_before)
D
del-zhenwu 已提交
519
        status = connect.compact(binary_collection)
520
        assert status.OK()
X
Xiaohai Xu 已提交
521
        # get collection info after compact
D
del-zhenwu 已提交
522
        info = connect.get_collection_stats(binary_collection)
523 524
        logging.getLogger().info(info["partitions"])
        size_after = info["partitions"][0]["segments"][0]["data_size"]
525
        logging.getLogger().info(size_after)
G
groot 已提交
526
        assert(size_before >= size_after)
527 528
    
    @pytest.mark.timeout(COMPACT_TIMEOUT)
529
    @pytest.mark.skip('not implemented')
D
del-zhenwu 已提交
530
    def test_insert_delete_all_and_compact(self, connect, binary_collection):
531
        '''
532 533
        target: test add entities, delete them and compact 
        method: add entities, delete all and compact collection
X
Xiaohai Xu 已提交
534
        expected: status ok, no data size in collection info because collection is empty
535
        '''
D
del-zhenwu 已提交
536
        ids = connect.insert(binary_collection, binary_entities)
537
        assert len(ids) == nb
D
del-zhenwu 已提交
538 539
        connect.flush([binary_collection])
        status = connect.delete_entity_by_id(binary_collection, ids)
540
        assert status.OK()
D
del-zhenwu 已提交
541
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
542
        # get collection info before compact
D
del-zhenwu 已提交
543 544
        info = connect.get_collection_stats(binary_collection)
        status = connect.compact(binary_collection)
545
        assert status.OK()
X
Xiaohai Xu 已提交
546
        # get collection info after compact
D
del-zhenwu 已提交
547
        info = connect.get_collection_stats(binary_collection)
548
        assert status.OK()
549 550
        logging.getLogger().info(info["partitions"])
        assert not info["partitions"][0]["segments"]
551 552 553

    # TODO
    @pytest.mark.level(2)
554
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
555
    def test_add_entity_and_compact_twice(self, connect, binary_collection):
556
        '''
557 558
        target: test add entity and compact twice
        method: add entity and compact collection twice
559 560
        expected: status ok
        '''
D
del-zhenwu 已提交
561
        ids = connect.insert(binary_collection, binary_entity)
562
        assert len(ids) == 1
D
del-zhenwu 已提交
563
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
564
        # get collection info before compact
D
del-zhenwu 已提交
565
        info = connect.get_collection_stats(binary_collection)
566
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
567
        status = connect.compact(binary_collection)
568
        assert status.OK()
X
Xiaohai Xu 已提交
569
        # get collection info after compact
D
del-zhenwu 已提交
570
        info = connect.get_collection_stats(binary_collection)
571
        size_after = info["partitions"][0]["segments"][0]["data_size"]
572
        assert(size_before == size_after)
D
del-zhenwu 已提交
573
        status = connect.compact(binary_collection)
574
        assert status.OK()
X
Xiaohai Xu 已提交
575
        # get collection info after compact twice
D
del-zhenwu 已提交
576
        info = connect.get_collection_stats(binary_collection)
577
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
578 579 580
        assert(size_after == size_after_twice)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
581
    def test_insert_delete_part_and_compact_twice(self, connect, binary_collection):
582
        '''
583 584
        target: test add entities, delete part of them and compact twice
        method: add entities, delete part and compact collection twice
585 586
        expected: status ok, data size smaller after first compact, no change after second
        '''
D
del-zhenwu 已提交
587
        ids = connect.insert(binary_collection, binary_entities)
588
        assert len(ids) == nb
D
del-zhenwu 已提交
589
        connect.flush([binary_collection])
590
        delete_ids = [ids[0], ids[-1]]
D
del-zhenwu 已提交
591
        status = connect.delete_entity_by_id(binary_collection, delete_ids)
592
        assert status.OK()
D
del-zhenwu 已提交
593
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
594
        # get collection info before compact
D
del-zhenwu 已提交
595
        info = connect.get_collection_stats(binary_collection)
596
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
597
        status = connect.compact(binary_collection)
598
        assert status.OK()
X
Xiaohai Xu 已提交
599
        # get collection info after compact
D
del-zhenwu 已提交
600
        info = connect.get_collection_stats(binary_collection)
601
        size_after = info["partitions"][0]["segments"][0]["data_size"]
G
groot 已提交
602
        assert(size_before >= size_after)
D
del-zhenwu 已提交
603
        status = connect.compact(binary_collection)
604
        assert status.OK()
X
Xiaohai Xu 已提交
605
        # get collection info after compact twice
D
del-zhenwu 已提交
606
        info = connect.get_collection_stats(binary_collection)
607
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
608 609 610
        assert(size_after == size_after_twice)

    @pytest.mark.timeout(COMPACT_TIMEOUT)
X
Xiaohai Xu 已提交
611
    def test_compact_multi_collections(self, connect):
612
        '''
X
Xiaohai Xu 已提交
613
        target: test compact works or not with multiple collections
614
        method: create 10 collections, add entities into them and compact in turn
615 616 617
        expected: status ok
        '''
        nq = 100
X
Xiaohai Xu 已提交
618
        num_collections = 10
619
        tmp, entities = gen_binary_entities(nq)
X
Xiaohai Xu 已提交
620 621 622 623
        collection_list = []
        for i in range(num_collections):
            collection_name = gen_unique_str("test_compact_multi_collection_%d" % i)
            collection_list.append(collection_name)
624
            connect.create_collection(collection_name, default_binary_fields)
X
Xiaohai Xu 已提交
625
        for i in range(num_collections):
626 627
            ids = connect.insert(collection_list[i], entities)
            assert len(ids) == nq
D
del-zhenwu 已提交
628
            status = connect.delete_entity_by_id(collection_list[i], [ids[0], ids[-1]])
629
            assert status.OK()
630
            connect.flush([collection_list[i]])
X
Xiaohai Xu 已提交
631
            status = connect.compact(collection_list[i])
632
            assert status.OK()
633 634
            status = connect.drop_collection(collection_list[i])
            assert status.OK()
635

636
    @pytest.mark.level(2)
637
    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
638
    def test_add_entity_after_compact(self, connect, binary_collection):
639
        '''
640 641 642
        target: test add entity after compact
        method: after compact operation, add entity
        expected: status ok, entity added
643
        '''
D
del-zhenwu 已提交
644 645
        ids = connect.insert(binary_collection, binary_entities)
        connect.flush([binary_collection])
X
Xiaohai Xu 已提交
646
        # get collection info before compact
D
del-zhenwu 已提交
647
        info = connect.get_collection_stats(binary_collection)
648
        size_before = info["partitions"][0]["segments"][0]["data_size"]
D
del-zhenwu 已提交
649
        status = connect.compact(binary_collection)
650
        assert status.OK()
X
Xiaohai Xu 已提交
651
        # get collection info after compact
D
del-zhenwu 已提交
652
        info = connect.get_collection_stats(binary_collection)
653
        size_after = info["partitions"][0]["segments"][0]["data_size"]
654
        assert(size_before == size_after)
D
del-zhenwu 已提交
655 656 657
        ids = connect.insert(binary_collection, binary_entity)
        connect.flush([binary_collection])
        res = connect.count_entities(binary_collection)
658
        assert res == nb + 1
659 660

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
661
    def test_delete_entities_after_compact(self, connect, binary_collection):
662
        '''
663 664 665
        target: test delete entities after compact
        method: after compact operation, delete entities
        expected: status ok, entities deleted
666
        '''
D
del-zhenwu 已提交
667 668 669
        ids = connect.insert(binary_collection, binary_entities)
        connect.flush([binary_collection])
        status = connect.compact(binary_collection)
670
        assert status.OK()
D
del-zhenwu 已提交
671 672
        connect.flush([binary_collection])
        status = connect.delete_entity_by_id(binary_collection, ids)
673
        assert status.OK()
D
del-zhenwu 已提交
674 675
        connect.flush([binary_collection])
        res = connect.count_entities(binary_collection)
676
        assert res == 0
677 678

    @pytest.mark.timeout(COMPACT_TIMEOUT)
D
del-zhenwu 已提交
679
    def test_search_after_compact(self, connect, binary_collection):
680 681 682 683 684
        '''
        target: test search after compact
        method: after compact operation, search vector
        expected: status ok
        '''
D
del-zhenwu 已提交
685
        ids = connect.insert(binary_collection, binary_entities)
686
        assert len(ids) == nb
D
del-zhenwu 已提交
687 688
        connect.flush([binary_collection])
        status = connect.compact(binary_collection)
689
        assert status.OK()
690 691
        query_vecs = [raw_vectors[0]]
        distance = jaccard(query_vecs[0], raw_vectors[0])
692
        query = copy.deepcopy(default_query)
D
del-zhenwu 已提交
693
        res = connect.search(binary_collection, query)
694

695
        assert abs(res[0]._distances[0]-distance) <= epsilon
696

697
    # TODO:
698
    @pytest.mark.timeout(COMPACT_TIMEOUT)
699
    def test_search_after_compact_ip(self, connect, collection):
700 701 702 703 704
        '''
        target: test search after compact
        method: after compact operation, search vector
        expected: status ok
        '''
705
        ids = connect.insert(collection, entities)
706
        assert len(ids) == nb
707 708 709
        connect.flush([collection])
        status = connect.compact(collection)
        query = ip_query()
710 711
        query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0],
                                                                   entities[-1]["values"][-1]]
712
        res = connect.search(collection, query)
713
        logging.getLogger().info(res)
714 715 716 717
        assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"])
        assert res[0]._distances[0] < 1 - epsilon
        assert res[1]._distances[0] > 1 - epsilon
        assert res[2]._distances[0] > 1 - epsilon