test_list_id_in_segment.py 15.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
import time
import random
import pdb
import threading
import logging
from multiprocessing import Pool, Process
import pytest
from utils import *

dim = 128
11
segment_row_count = 100000
12
nb = 6000
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
tag = "1970-01-01"
field_name = "float_vector"
default_index_name = "list_index"
collection_id = "list_id_in_segment"
entity = gen_entities(1)
raw_vector, binary_entity = gen_binary_entities(1)
entities = gen_entities(nb)
raw_vectors, binary_entities = gen_binary_entities(nb)
default_fields = gen_default_fields() 


def get_segment_name(connect, collection, nb=1, vec_type='float', index_params=None):
    if vec_type != "float":
        vectors, entities = gen_binary_entities(nb)
    else:
        entities = gen_entities(nb)
    ids = connect.insert(collection, entities)
    connect.flush([collection])
    if index_params:
        connect.create_index(collection, field_name, default_index_name, index_params)
    stats = connect.get_collection_stats(collection)
    return ids, stats["partitions"][0]["segments"][0]["name"]
35 36 37 38 39 40


class TestGetVectorIdsBase:
        
    """
    ******************************************************************
D
del-zhenwu 已提交
41
      The following cases are used to test `list_id_in_segment` function
42 43
    ******************************************************************
    """
D
del-zhenwu 已提交
44
    def test_list_id_in_segment_collection_name_None(self, connect, collection):
45
        '''
X
Xiaohai Xu 已提交
46
        target: get vector ids where collection name is None
D
del-zhenwu 已提交
47
        method: call list_id_in_segment with the collection_name: None
48 49
        expected: exception raised
        '''
X
Xiaohai Xu 已提交
50
        collection_name = None
51
        ids, name = get_segment_name(connect, collection)
52
        with pytest.raises(Exception) as e:
53
            vector_ids = connect.list_id_in_segment(collection_name, name)
54

D
del-zhenwu 已提交
55
    def test_list_id_in_segment_collection_name_not_existed(self, connect, collection):
56
        '''
X
Xiaohai Xu 已提交
57
        target: get vector ids where collection name does not exist
D
del-zhenwu 已提交
58
        method: call list_id_in_segment with a random collection_name, which is not in db
59 60
        expected: status not ok
        '''
61 62 63 64
        collection_name = gen_unique_str(collection_id)
        ids, name = get_segment_name(connect, collection)
        with pytest.raises(Exception) as e:
            vector_ids = connect.list_id_in_segment(collection_name, name)
65 66 67
    
    @pytest.fixture(
        scope="function",
68
        params=gen_invalid_strs()
69
    )
X
Xiaohai Xu 已提交
70
    def get_collection_name(self, request):
71 72
        yield request.param

D
del-zhenwu 已提交
73
    def test_list_id_in_segment_collection_name_invalid(self, connect, collection, get_collection_name):
74
        '''
X
Xiaohai Xu 已提交
75
        target: get vector ids where collection name is invalid
D
del-zhenwu 已提交
76
        method: call list_id_in_segment with invalid collection_name
77 78
        expected: status not ok
        '''
X
Xiaohai Xu 已提交
79
        collection_name = get_collection_name
80 81 82
        ids, name = get_segment_name(connect, collection)
        with pytest.raises(Exception) as e:
            vector_ids = connect.list_id_in_segment(collection_name, name)
83

D
del-zhenwu 已提交
84
    def test_list_id_in_segment_name_None(self, connect, collection):
85 86
        '''
        target: get vector ids where segment name is None
D
del-zhenwu 已提交
87
        method: call list_id_in_segment with the name: None
88 89
        expected: exception raised
        '''
90
        ids, valid_name = get_segment_name(connect, collection)
91 92
        segment = None
        with pytest.raises(Exception) as e:
93
            vector_ids = connect.list_id_in_segment(collection, segment)
94

D
del-zhenwu 已提交
95
    def test_list_id_in_segment_name_not_existed(self, connect, collection):
96 97
        '''
        target: get vector ids where segment name does not exist
D
del-zhenwu 已提交
98
        method: call list_id_in_segment with a random segment name
99 100
        expected: status not ok
        '''
101 102 103 104
        ids, valid_name = get_segment_name(connect, collection)
        segment = gen_unique_str(collection_id)
        with pytest.raises(Exception) as e:
            vector_ids = connect.list_id_in_segment(collection, segment)
105

D
del-zhenwu 已提交
106
    def test_list_id_in_segment_without_index_A(self, connect, collection):
107 108
        '''
        target: get vector ids when there is no index
D
del-zhenwu 已提交
109
        method: call list_id_in_segment and check if the segment contains vectors
110 111
        expected: status ok
        '''
112 113 114
        nb = 1
        ids, name = get_segment_name(connect, collection, nb=nb)
        vector_ids = connect.list_id_in_segment(collection, name)
115
        # vector_ids should match ids
116 117
        assert len(vector_ids) == nb
        assert vector_ids[0] == ids[0]
118

D
del-zhenwu 已提交
119
    def test_list_id_in_segment_without_index_B(self, connect, collection):
120 121
        '''
        target: get vector ids when there is no index but with partition
D
del-zhenwu 已提交
122
        method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
123 124
        expected: status ok
        '''
125 126 127 128 129 130 131 132
        nb = 10
        entities = gen_entities(nb)
        connect.create_partition(collection, tag)
        ids = connect.insert(collection, entities, partition_tag=tag)
        connect.flush([collection])
        stats = connect.get_collection_stats(collection)
        assert stats["partitions"][1]["tag"] == tag
        vector_ids = connect.list_id_in_segment(collection, stats["partitions"][1]["segments"][0]["name"])
133
        # vector_ids should match ids
134 135
        assert len(vector_ids) == nb
        for i in range(nb):
136 137 138 139
            assert vector_ids[i] == ids[i]

    @pytest.fixture(
        scope="function",
140
        params=gen_simple_index()
141
    )
142
    def get_simple_index(self, request, connect):
143 144 145
        if str(connect._cmd("mode")) == "CPU":
            if request.param["index_type"] in index_cpu_not_support():
                pytest.skip("CPU not support index_type: ivf_sq8h")
146 147
        return request.param

D
del-zhenwu 已提交
148
    def test_list_id_in_segment_with_index_A(self, connect, collection, get_simple_index):
149 150
        '''
        target: get vector ids when there is index
D
del-zhenwu 已提交
151
        method: call list_id_in_segment and check if the segment contains vectors
152 153
        expected: status ok
        '''
154 155 156
        ids, name = get_segment_name(connect, collection, nb=nb, index_params=get_simple_index)
        vector_ids = connect.list_id_in_segment(collection, name)
        # TODO: 
157

D
del-zhenwu 已提交
158
    def test_list_id_in_segment_with_index_B(self, connect, collection, get_simple_index):
159 160
        '''
        target: get vector ids when there is index and with partition
D
del-zhenwu 已提交
161
        method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
162 163
        expected: status ok
        '''
164 165 166 167 168 169
        connect.create_partition(collection, tag)
        ids = connect.insert(collection, entities, partition_tag=tag)
        connect.flush([collection])
        stats = connect.get_collection_stats(collection)
        assert stats["partitions"][1]["tag"] == tag
        vector_ids = connect.list_id_in_segment(collection, stats["partitions"][1]["segments"][0]["name"])
170
        # vector_ids should match ids
171
        # TODO
172

D
del-zhenwu 已提交
173
    def test_list_id_in_segment_after_delete_vectors(self, connect, collection):
174 175
        '''
        target: get vector ids after vectors are deleted
D
del-zhenwu 已提交
176
        method: add vectors and delete a few, call list_id_in_segment
177 178
        expected: status ok, vector_ids decreased after vectors deleted
        '''
179 180
        nb = 2
        ids, name = get_segment_name(connect, collection, nb=nb)
181
        delete_ids = [ids[0]]
D
del-zhenwu 已提交
182
        status = connect.delete_entity_by_id(collection, delete_ids)
183 184 185
        connect.flush([collection])
        stats = connect.get_collection_stats(collection)
        vector_ids = connect.list_id_in_segment(collection, stats["partitions"][0]["segments"][0]["name"])
186 187 188 189 190 191 192
        assert len(vector_ids) == 1
        assert vector_ids[0] == ids[1]


class TestGetVectorIdsIP:
    """
    ******************************************************************
D
del-zhenwu 已提交
193
      The following cases are used to test `list_id_in_segment` function
194 195
    ******************************************************************
    """
D
del-zhenwu 已提交
196
    def test_list_id_in_segment_without_index_A(self, connect, ip_collection):
197 198
        '''
        target: get vector ids when there is no index
D
del-zhenwu 已提交
199
        method: call list_id_in_segment and check if the segment contains vectors
200 201
        expected: status ok
        '''
202 203 204 205 206 207
        nb = 10
        entities = gen_entities(nb)
        ids = connect.insert(ip_collection, entities)
        connect.flush([ip_collection])
        stats = connect.get_collection_stats(ip_collection)
        vector_ids = connect.list_id_in_segment(ip_collection, stats["partitions"][0]["segments"][0]["name"])
208
        # vector_ids should match ids
209 210
        assert len(vector_ids) == nb
        for i in range(nb):
211 212
            assert vector_ids[i] == ids[i]

D
del-zhenwu 已提交
213
    def test_list_id_in_segment_without_index_B(self, connect, ip_collection):
214 215
        '''
        target: get vector ids when there is no index but with partition
D
del-zhenwu 已提交
216
        method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
217 218
        expected: status ok
        '''
219 220 221 222 223 224 225 226
        connect.create_partition(ip_collection, tag)
        nb = 10
        entities = gen_entities(nb)
        ids = connect.insert(ip_collection, entities, partition_tag=tag)
        connect.flush([ip_collection])
        stats = connect.get_collection_stats(ip_collection)
        assert stats["partitions"][1]["tag"] == tag
        vector_ids = connect.list_id_in_segment(ip_collection, stats["partitions"][1]["segments"][0]["name"])
227
        # vector_ids should match ids
228 229
        assert len(vector_ids) == nb
        for i in range(nb):
230 231 232 233
            assert vector_ids[i] == ids[i]

    @pytest.fixture(
        scope="function",
234
        params=gen_simple_index()
235
    )
236
    def get_simple_index(self, request, connect):
237 238 239
        if str(connect._cmd("mode")) == "CPU":
            if request.param["index_type"] in index_cpu_not_support():
                pytest.skip("CPU not support index_type: ivf_sq8h")
240 241
        return request.param

D
del-zhenwu 已提交
242
    def test_list_id_in_segment_with_index_A(self, connect, ip_collection, get_simple_index):
243 244
        '''
        target: get vector ids when there is index
D
del-zhenwu 已提交
245
        method: call list_id_in_segment and check if the segment contains vectors
246 247
        expected: status ok
        '''
248 249 250
        ids, name = get_segment_name(connect, ip_collection, nb=nb, index_params=get_simple_index)
        vector_ids = connect.list_id_in_segment(ip_collection, name)
        # TODO: 
251

D
del-zhenwu 已提交
252
    def test_list_id_in_segment_with_index_B(self, connect, ip_collection, get_simple_index):
253 254
        '''
        target: get vector ids when there is index and with partition
D
del-zhenwu 已提交
255
        method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
256 257
        expected: status ok
        '''
258 259 260 261 262 263
        connect.create_partition(ip_collection, tag)
        ids = connect.insert(ip_collection, entities, partition_tag=tag)
        connect.flush([ip_collection])
        stats = connect.get_collection_stats(ip_collection)
        assert stats["partitions"][1]["tag"] == tag
        vector_ids = connect.list_id_in_segment(ip_collection, stats["partitions"][1]["segments"][0]["name"])
264
        # vector_ids should match ids
265
        # TODO
266

D
del-zhenwu 已提交
267
    def test_list_id_in_segment_after_delete_vectors(self, connect, ip_collection):
268 269
        '''
        target: get vector ids after vectors are deleted
D
del-zhenwu 已提交
270
        method: add vectors and delete a few, call list_id_in_segment
271 272
        expected: status ok, vector_ids decreased after vectors deleted
        '''
273 274
        nb = 2
        ids, name = get_segment_name(connect, ip_collection, nb=nb)
275
        delete_ids = [ids[0]]
D
del-zhenwu 已提交
276
        status = connect.delete_entity_by_id(ip_collection, delete_ids)
277 278 279
        connect.flush([ip_collection])
        stats = connect.get_collection_stats(ip_collection)
        vector_ids = connect.list_id_in_segment(ip_collection, stats["partitions"][0]["segments"][0]["name"])
280 281 282 283 284 285 286
        assert len(vector_ids) == 1
        assert vector_ids[0] == ids[1]


class TestGetVectorIdsJAC:
    """
    ******************************************************************
D
del-zhenwu 已提交
287
      The following cases are used to test `list_id_in_segment` function
288 289
    ******************************************************************
    """
D
del-zhenwu 已提交
290
    def test_list_id_in_segment_without_index_A(self, connect, jac_collection):
291 292
        '''
        target: get vector ids when there is no index
D
del-zhenwu 已提交
293
        method: call list_id_in_segment and check if the segment contains vectors
294 295
        expected: status ok
        '''
296 297 298 299 300 301
        nb = 10
        vectors, entities = gen_binary_entities(nb)
        ids = connect.insert(jac_collection, entities)
        connect.flush([jac_collection])
        stats = connect.get_collection_stats(jac_collection)
        vector_ids = connect.list_id_in_segment(jac_collection, stats["partitions"][0]["segments"][0]["name"])
302
        # vector_ids should match ids
303 304
        assert len(vector_ids) == nb
        for i in range(nb):
305 306
            assert vector_ids[i] == ids[i]

D
del-zhenwu 已提交
307
    def test_list_id_in_segment_without_index_B(self, connect, jac_collection):
308 309
        '''
        target: get vector ids when there is no index but with partition
D
del-zhenwu 已提交
310
        method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
311 312
        expected: status ok
        '''
313 314 315 316 317 318 319
        connect.create_partition(jac_collection, tag)
        nb = 10
        vectors, entities = gen_binary_entities(nb)
        ids = connect.insert(jac_collection, entities, partition_tag=tag)
        connect.flush([jac_collection])
        stats = connect.get_collection_stats(jac_collection)
        vector_ids = connect.list_id_in_segment(jac_collection, stats["partitions"][1]["segments"][0]["name"])
320
        # vector_ids should match ids
321 322
        assert len(vector_ids) == nb
        for i in range(nb):
323 324 325 326
            assert vector_ids[i] == ids[i]

    @pytest.fixture(
        scope="function",
327
        params=gen_simple_index()
328
    )
329
    def get_jaccard_index(self, request, connect):
330
        logging.getLogger().info(request.param)
331
        if request.param["index_type"] in binary_support():
332 333
            return request.param
        else:
334
            pytest.skip("not support")
335

D
del-zhenwu 已提交
336
    def test_list_id_in_segment_with_index_A(self, connect, jac_collection, get_jaccard_index):
337 338
        '''
        target: get vector ids when there is index
D
del-zhenwu 已提交
339
        method: call list_id_in_segment and check if the segment contains vectors
340 341
        expected: status ok
        '''
342 343 344
        ids, name = get_segment_name(connect, jac_collection, nb=nb, index_params=get_jaccard_index, vec_type='binary')
        vector_ids = connect.list_id_in_segment(jac_collection, name)
        # TODO: 
345

D
del-zhenwu 已提交
346
    def test_list_id_in_segment_with_index_B(self, connect, jac_collection, get_jaccard_index):
347 348
        '''
        target: get vector ids when there is index and with partition
D
del-zhenwu 已提交
349
        method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
350 351
        expected: status ok
        '''
352 353 354 355 356 357
        connect.create_partition(jac_collection, tag)
        ids = connect.insert(jac_collection, entities, partition_tag=tag)
        connect.flush([jac_collection])
        stats = connect.get_collection_stats(jac_collection)
        assert stats["partitions"][1]["tag"] == tag
        vector_ids = connect.list_id_in_segment(jac_collection, stats["partitions"][1]["segments"][0]["name"])
358
        # vector_ids should match ids
359
        # TODO
360

361
    def test_list_id_in_segment_after_delete_vectors(self, connect, jac_collection, get_jaccard_index):
362 363
        '''
        target: get vector ids after vectors are deleted
D
del-zhenwu 已提交
364
        method: add vectors and delete a few, call list_id_in_segment
365 366
        expected: status ok, vector_ids decreased after vectors deleted
        '''
367 368
        nb = 2
        ids, name = get_segment_name(connect, jac_collection, nb=nb, vec_type='binary', index_params=get_jaccard_index)
369
        delete_ids = [ids[0]]
D
del-zhenwu 已提交
370
        status = connect.delete_entity_by_id(jac_collection, delete_ids)
371 372 373
        connect.flush([jac_collection])
        stats = connect.get_collection_stats(jac_collection)
        vector_ids = connect.list_id_in_segment(jac_collection, stats["partitions"][0]["segments"][0]["name"])
374
        assert len(vector_ids) == 1
375
        assert vector_ids[0] == ids[1]