Merge branch 'scalar-field-filtering' of github.com:milvus-io/milvus into scalar-field-filtering

7f1501a8 · fishpenguin · 44cb244e · 47f31c21 · 44cb244e · 7f1501a8
10 changed file
--- a/tests/milvus_python_test/collection/test_collection.py
+++ b/tests/milvus_python_test/collection/test_collection.py
--- a/tests/milvus_python_test/collection/test_collection_logic.py
+++ b/tests/milvus_python_test/collection/test_collection_logic.py
+import pdb
+import pytest
+import logging
+import itertools
+from time import sleep
+from multiprocessing import Process
+from milvus import IndexType, MetricType
+from utils import *
+
+dim = 128
+default_segment_size = 1024
+drop_collection_interval_time = 3
+segment_size = 10
+vectors = gen_vectors(100, dim)
+default_fields = gen_default_fields() 
+
+
+def create_collection(connect, **params):
+    connect.create_collection(params["collection_name"], default_fields)
+
+def search_collection(connect, **params):
+    status, result = connect.search(
+        params["collection_name"], 
+        params["top_k"], 
+        params["query_vectors"],
+        params={"nprobe": params["nprobe"]})
+    return status
+
+def load_collection(connect, **params):
+    connect.load_collection(params["collection_name"])
+
+def has(connect, **params):
+    status, result = connect.has_collection(params["collection_name"])
+    return status
+
+def show(connect, **params):
+    status, result = connect.list_collections()
+    return status
+
+def delete(connect, **params):
+    status = connect.drop_collection(params["collection_name"])
+    return status
+
+def describe(connect, **params):
+    status, result = connect.get_collection_info(params["collection_name"])
+    return status
+
+def rowcount(connect, **params):
+    status, result = connect.count_entities(params["collection_name"])
+    return status
+
+def create_index(connect, **params):
+    status = connect.create_index(params["collection_name"], params["index_type"], params["index_param"])
+    return status
+
+func_map = { 
+    # 0:has, 
+    1:show,
+    10:create_collection, 
+    11:describe,
+    12:rowcount,
+    13:search_collection,
+    14:load_collection,
+    15:create_index,
+    30:delete
+}
+
+def gen_sequence():
+    raw_seq = func_map.keys()
+    result = itertools.permutations(raw_seq)
+    for x in result:
+        yield x
+
+
+class TestCollectionLogic(object):
+    @pytest.mark.parametrize("logic_seq", gen_sequence())
+    @pytest.mark.level(2)
+    def _test_logic(self, connect, logic_seq, args):
+        if args["handler"] == "HTTP":
+            pytest.skip("Skip in http mode")
+        if self.is_right(logic_seq):
+            self.execute(logic_seq, connect)
+        else:
+            self.execute_with_error(logic_seq, connect)
+        self.tear_down(connect)
+
+    def is_right(self, seq):
+        if sorted(seq) == seq:
+            return True
+
+        not_created = True
+        has_deleted = False
+        for i in range(len(seq)):
+            if seq[i] > 10 and not_created:
+                return False
+            elif seq [i] > 10 and has_deleted:
+                return False
+            elif seq[i] == 10:
+                not_created = False
+            elif seq[i] == 30:
+                has_deleted = True
+
+        return True
+
+    def execute(self, logic_seq, connect):
+        basic_params = self.gen_params()
+        for i in range(len(logic_seq)):
+            # logging.getLogger().info(logic_seq[i])
+            f = func_map[logic_seq[i]]
+            status = f(connect, **basic_params)
+            assert status.OK()
+
+    def execute_with_error(self, logic_seq, connect):
+        basic_params = self.gen_params()
+
+        error_flag = False
+        for i in range(len(logic_seq)):
+            f = func_map[logic_seq[i]]
+            status = f(connect, **basic_params)
+            if not status.OK():
+                # logging.getLogger().info(logic_seq[i])
+                error_flag = True
+                break
+        assert error_flag == True
+
+    def tear_down(self, connect):
+        names = connect.list_collections()[1]
+        for name in names:
+            connect.drop_collection(name)
+
+    def gen_params(self):
+        collection_name = gen_unique_str("test_collection")
+        top_k = 1
+        vectors = gen_vectors(2, dim)
+        param = {'collection_name': collection_name,
+                 'dimension': dim,
+                 'metric_type': MetricType.L2,
+                 'nprobe': 1,
+                 'top_k': top_k,
+                 'index_type': IndexType.IVF_SQ8,
+                 'index_param': {
+                        'nlist': 16384
+                 },
+                 'query_vectors': vectors}
+        return param
--- a/tests/milvus_python_test/collection/test_create_collection.py
+++ b/tests/milvus_python_test/collection/test_create_collection.py
 import pdb
-import pytest
+import copy
 import logging
 import itertools
 from time import sleep
 from multiprocessing import Process
+
+import pytest
 from milvus import IndexType, MetricType
 from utils import *

@@ -15,7 +17,7 @@ vectors = gen_vectors(100, dim)
 default_fields = gen_default_fields() 


-class TestCollection:
+class TestCreateCollection:

    """
    ******************************************************************
@@ -104,6 +106,27 @@ class TestCollection:
        finally:
            enable_flush(connect)

+    def test_create_collection_after_insert(self, connect, collection):
+        '''
+        target: test insert vector, then create collection again
+        method: insert vector and create collection
+        expected: error raised
+        '''
+        connect.insert(collection, entities)
+        with pytest.raises(Exception) as e:
+            connect.create_collection(collection, default_fields)
+
+    def test_create_collection_after_insert_flush(self, connect, collection):
+        '''
+        target: test insert vector, then create collection again
+        method: insert vector and create collection
+        expected: error raised
+        '''
+        connect.insert(collection, entities)
+        connect.flush([collection])
+        with pytest.raises(Exception) as e:
+            connect.create_collection(collection, default_fields)
+
    # TODO: assert exception
    @pytest.mark.level(2)
    def test_create_collection_without_connection(self, dis_connect):
@@ -182,11 +205,17 @@ class TestCreateCollectionInvalid(object):

    @pytest.fixture(
        scope="function",
-        params=gen_invalid_collection_names()
+        params=gen_invalid_strings()
    )
-    def get_collection_name(self, request):
+    def get_invalid_string(self, request):
        yield request.param

+    @pytest.fixture(
+        scope="function",
+        params=gen_invalid_field_types()
+    )
+    def get_field_type(self, request):
+        yield request.param

    @pytest.mark.level(2)
    def test_create_collection_with_invalid_segment_size(self, connect, get_segment_size):
@@ -209,12 +238,12 @@ class TestCreateCollectionInvalid(object):
        dimension = get_dim
        collection_name = gen_unique_str()
        fields = copy.deepcopy(default_fields)
-        fields["fields"][-1]["dimension"] = dimension
+        fields["fields"][-1]["extra_params"]["dimension"] = dimension
        with pytest.raises(Exception) as e:
             connect.create_collection(collection_name, fields)

    @pytest.mark.level(2)
-    def test_create_collection_with_invalid_collectionname(self, connect, get_collection_name):
+    def test_create_collection_with_invalid_collectionname(self, connect, get_invalid_string):
        collection_name = get_collection_name
        with pytest.raises(Exception) as e:
            connect.create_collection(collection_name, default_fields)
@@ -248,7 +277,7 @@ class TestCreateCollectionInvalid(object):
        '''
        collection_name = gen_unique_str("test_collection")
        fields = copy.deepcopy(default_fields)
-        fields["fields"][-1].pop("dimension")
+        fields["fields"][-1]["extra_params"].pop("dimension")
        with pytest.raises(Exception) as e:
            connect.create_collection(collection_name, fields)

@@ -279,3 +308,35 @@ class TestCreateCollectionInvalid(object):
        res = connect.get_collection_info(collection_name)
        logging.getLogger().info(res)
        # assert result.metric_type == MetricType.L2
+
+    # TODO: assert exception
+    def test_create_collection_limit_fields(self, connect):
+        collection_name = gen_unique_str("test_collection")
+        limit_num = 64
+        fields = copy.deepcopy(default_fields)
+        for i in range(limit_num):
+            field_name = gen_unique_str("field_name")
+            field = {"field": field_name, "type": DataType.INT8}
+            fields["fields"].append(field)
+        with pytest.raises(Exception) as e:
+            connect.create_collection(collection_name, fields)
+
+    # TODO: assert exception
+    def test_create_collection_invalid_field_name(self, connect, get_invalid_string):
+        collection_name = gen_unique_str("test_collection")
+        fields = copy.deepcopy(default_fields)
+        field_name = get_invalid_string
+        field = {"field": field_name, "type": DataType.INT8}
+        fields["fields"].append(field)
+        with pytest.raises(Exception) as e:
+            connect.create_collection(collection_name, fields)
+
+    # TODO: assert exception
+    def test_create_collection_invalid_field_type(self, connect, get_field_type):
+        collection_name = gen_unique_str("test_collection")
+        fields = copy.deepcopy(default_fields)
+        field_type = get_field_type
+        field = {"field": "test_field", "type": field_type}
+        fields["fields"].append(field)
+        with pytest.raises(Exception) as e:
+            connect.create_collection(collection_name, fields)
--- a/tests/milvus_python_test/collection/test_drop_collection.py
+++ b/tests/milvus_python_test/collection/test_drop_collection.py
+import pdb
+import pytest
+import logging
+import itertools
+from time import sleep
+from multiprocessing import Process
+from milvus import IndexType, MetricType
+from utils import *
+
+uniq_id = "test_drop_collection"
+default_fields = gen_default_fields() 
+
+
+class TestDropCollection:
+
+    """
+    ******************************************************************
+      The following cases are used to test `drop_collection` function
+    ******************************************************************
+    """
+    def test_drop_collection(self, connect, collection):
+        '''
+        target: test delete collection created with correct params 
+        method: create collection and then delete, 
+            assert the value returned by delete method
+        expected: status ok, and no collection in collections
+        '''
+        connect.drop_collection(collection)
+        assert not assert_has_collection(connect, collection)
+
+    @pytest.mark.level(2)
+    def test_drop_collection_without_connection(self, collection, dis_connect):
+        '''
+        target: test describe collection, without connection
+        method: drop collection with correct params, with a disconnected instance
+        expected: drop raise exception
+        '''
+        with pytest.raises(Exception) as e:
+            dis_connect.drop_collection(collection)
+
+    def test_drop_collection_not_existed(self, connect):
+        '''
+        target: test if collection not created
+        method: random a collection name, which not existed in db, 
+            assert the exception raised returned by drp_collection method
+        expected: False
+        '''
+        collection_name = gen_unique_str(uniq_id)
+        with pytest.raises(Exception) as e:
+            assert not assert_has_collection(connect, collection_name)
+
+
+class TestDropCollectionInvalid(object):
+    """
+    Test has collection with invalid params
+    """
+    @pytest.fixture(
+        scope="function",
+        params=gen_invalid_collection_names()
+    )
+    def get_collection_name(self, request):
+        yield request.param
+
+    @pytest.mark.level(2)
+    def test_drop_collection_with_invalid_collectionname(self, connect, get_collection_name):
+        collection_name = get_collection_name
+        with pytest.raises(Exception) as e:
+            connect.has_collection(collection_name)
+
+    @pytest.mark.level(2)
+    def test_drop_collection_with_empty_collectionname(self, connect):
+        collection_name = ''
+        with pytest.raises(Exception) as e:
+            connect.has_collection(collection_name)
+
+    @pytest.mark.level(2)
+    def test_drop_collection_with_none_collectionname(self, connect):
+        collection_name = None
+        with pytest.raises(Exception) as e:
+            connect.has_collection(collection_name)
--- a/tests/milvus_python_test/collection/test_load_collection.py
+++ b/tests/milvus_python_test/collection/test_load_collection.py
+import pdb
+import pytest
+import logging
+import itertools
+from time import sleep
+from multiprocessing import Process
+from milvus import IndexType, MetricType
+from utils import *
+
+uniq_id = "test_load_collection"
+index_name = "load_index_name"
+default_fields = gen_default_fields() 
+entities = gen_entities(6000)
+
+
+class TestLoadCollection:
+
+    """
+    ******************************************************************
+      The following cases are used to test `load_collection` function
+    ******************************************************************
+    """
+    @pytest.fixture(
+        scope="function",
+        params=gen_simple_index()
+    )
+    def get_simple_index(self, request, connect):
+        if str(connect._cmd("mode")[1]) == "CPU":
+            if request.param["index_type"] == IndexType.IVF_SQ8H:
+                pytest.skip("sq8h not support in cpu mode")
+        if request.param["index_type"] == IndexType.IVF_PQ:
+            pytest.skip("Skip PQ Temporary")
+        return request.param
+
+    def test_load_collection_after_index(self, connect, collection, get_simple_index):
+        '''
+        target: test load collection, after index created
+        method: insert and create index, load collection with correct params
+        expected: describe raise exception
+        ''' 
+        connect.insert(collection, entities)
+        connect.flush([collection])
+        field_name = "fload_vector"
+        connect.create_index(collection, field_name, index_name, get_simple_index)
+        connect.load_collection(collection)
+
+    def load_empty_collection(self, connect, collection):
+        '''
+        target: test load collection
+        method: no entities in collection, load collection with correct params
+        expected: load success
+        '''
+        connect.load_collection(collection)
+
+    @pytest.mark.level(1)
+    def test_load_collection_dis_connect(self, dis_connect, collection):
+        '''
+        target: test load collection, without connection
+        method: load collection with correct params, with a disconnected instance
+        expected: load raise exception
+        '''
+        with pytest.raises(Exception) as e:
+            dis_connect.load_collection(collection)
+
+    @pytest.mark.level(2)
+    def test_load_collection_not_existed(self, connect, collection):
+        collection_name = gen_unique_str()
+        with pytest.raises(Exception) as e:
+            connect.load_collection(collection_name)
+
+
+class TestLoadCollectionInvalid(object):
+    """
+    Test load collection with invalid params
+    """
+    @pytest.fixture(
+        scope="function",
+        params=gen_invalid_collection_names()
+    )
+    def get_collection_name(self, request):
+        yield request.param
+
+    @pytest.mark.level(2)
+    def test_load_collection_with_invalid_collectionname(self, connect, get_collection_name):
+        collection_name = get_collection_name
+        with pytest.raises(Exception) as e:
+            connect.has_collection(collection_name)
+
+    @pytest.mark.level(2)
+    def test_load_collection_with_empty_collectionname(self, connect):
+        collection_name = ''
+        with pytest.raises(Exception) as e:
+            connect.has_collection(collection_name)
+
+    @pytest.mark.level(2)
+    def test_load_collection_with_none_collectionname(self, connect):
+        collection_name = None
+        with pytest.raises(Exception) as e:
+            connect.has_collection(collection_name)
--- a/tests/milvus_python_test/entity/test_delete.py
+++ b/tests/milvus_python_test/entity/test_delete.py
--- a/tests/milvus_python_test/entity/test_get_entity_by_id.py
+++ b/tests/milvus_python_test/entity/test_get_entity_by_id.py
--- a/tests/milvus_python_test/entity/test_insert.py
+++ b/tests/milvus_python_test/entity/test_insert.py
--- a/tests/milvus_python_test/entity/test_search.py
+++ b/tests/milvus_python_test/entity/test_search.py
+import time
 import pdb
-import struct
-from random import sample
-
-import pytest
+import copy
 import threading
-import datetime
 import logging
-from time import sleep
-from multiprocessing import Process
-import numpy
-import sklearn.preprocessing
+from multiprocessing import Pool, Process
+import pytest
 from milvus import IndexType, MetricType
 from utils import *

 dim = 128
-collection_id = "test_search"
-add_interval_time = 2
-vectors = gen_vectors(6000, dim)
-vectors = sklearn.preprocessing.normalize(vectors, axis=1, norm='l2')
-vectors = vectors.tolist()
+segment_size = 10
+collection_id = "test_insert"
+ADD_TIMEOUT = 60
+tag = "1970-01-01"
+insert_interval_time = 1.5
+nb = 6000
 top_k = 1
 nprobe = 1
 epsilon = 0.001
-tag = "1970-01-01"
-raw_vectors, binary_vectors = gen_binary_vectors(6000, dim)
-
+field_name = "float_vector"
+default_index_name = "insert_index"
+entity = gen_entities(1, is_normal=True)
+binary_entity = gen_binary_entities(1)
+entities = gen_entities(nb, is_normal=True)
+raw_vectors, binary_entities = gen_binary_entities(nb)
+default_single_query = {
+    "bool": {
+        "must": [
+            {"vector": {field_name: {"topk": 10, "query": entity, "params": {"index_name": default_index_name, "nprobe": 10}}}}
+        ]
+    }
+}
+
+query = {
+    "bool": {
+        "must": [
+            {"term": {"A": {"values": [1, 2, 5]}}},
+            {"range": {"B": {"ranges": {"GT": 1, "LT": 100}}}},
+            {"vector": {"Vec": {"topk": 10, "query": vec[: 1], "params": {"index_name": Indextype.IVF_FLAT, "nprobe": 10}}}}
+        ],
+    },
+}

 class TestSearchBase:
    def init_data(self, connect, collection, nb=6000, partition_tags=None):
        '''
-        Generate vectors and add it in collection, before search vectors
+        Generate entities and add it in collection
        '''
-        global vectors
+        global entities
        if nb == 6000:
-            add_vectors = vectors
+            insert_entities = entities
        else:  
-            add_vectors = gen_vectors(nb, dim)
-            add_vectors = sklearn.preprocessing.normalize(add_vectors, axis=1, norm='l2')
-            add_vectors = add_vectors.tolist()
+            insert_entities = gen_entities(nb, is_normal=True)
        if partition_tags is None:
-            status, ids = connect.insert(collection, add_vectors)
-            assert status.OK()
+            ids = connect.insert(collection, insert_entities)
        else:
-            status, ids = connect.insert(collection, add_vectors, partition_tag=partition_tags)
-            assert status.OK()
+            ids = connect.insert(collection, insert_entities, partition_tag=partition_tags)
        connect.flush([collection])
-        return add_vectors, ids
+        return insert_entities, ids

    def init_binary_data(self, connect, collection, nb=6000, insert=True, partition_tags=None):
        '''
-        Generate vectors and add it in collection, before search vectors
+        Generate entities and add it in collection
        '''
        ids = []
-        global binary_vectors
+        global binary_entities
        global raw_vectors
        if nb == 6000:
-            add_vectors = binary_vectors
-            add_raw_vectors = raw_vectors
+            insert_entities = binary_entities
+            insert_raw_vectors = raw_vectors
        else:  
-            add_raw_vectors, add_vectors = gen_binary_vectors(nb, dim)
+            insert_raw_vectors, insert_entities = gen_binary_entities(nb)
        if insert is True:
            if partition_tags is None:
-                status, ids = connect.insert(collection, add_vectors)
-                assert status.OK()
+                ids = connect.insert(collection, add_vectors)
            else:
-                status, ids = connect.insert(collection, add_vectors, partition_tag=partition_tags)
-                assert status.OK()
+                ids = connect.insert(collection, add_vectors, partition_tag=partition_tags)
            connect.flush([collection])
-        return add_raw_vectors, add_vectors, ids
+        return insert_raw_vectors, insert_entities, ids

    """
    generate valid create_index params

--- a/tests/milvus_python_test/utils.py
+++ b/tests/milvus_python_test/utils.py
@@ -29,6 +29,36 @@ all_index_types = [
 ]


+def jaccard(x, y):
+    x = np.asarray(x, np.bool)
+    y = np.asarray(y, np.bool)
+    return 1 - np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum())
+
+
+def hamming(x, y):
+    x = np.asarray(x, np.bool)
+    y = np.asarray(y, np.bool)
+    return np.bitwise_xor(x, y).sum()
+
+
+def tanimoto(x, y):
+    x = np.asarray(x, np.bool)
+    y = np.asarray(y, np.bool)
+    return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum()))
+
+
+def substructure(x, y):
+    x = np.asarray(x, np.bool)
+    y = np.asarray(y, np.bool)
+    return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(y)
+
+
+def superstructure(x, y):
+    x = np.asarray(x, np.bool)
+    y = np.asarray(y, np.bool)
+    return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x)
+
+
 def get_milvus(host, port, uri=None, handler=None, **kwargs):
    if handler is None:
        handler = "GRPC"
@@ -58,8 +88,16 @@ def gen_inaccuracy(num):
    return num / 255.0


-def gen_vectors(num, dim):
-    return [[random.random() for _ in range(dim)] for _ in range(num)]
+def gen_vectors(num, dim, is_normal=False):
+    vectors = [[random.random() for _ in range(dim)] for _ in range(num)]
+    vectors = sklearn.preprocessing.normalize(vectors, axis=1, norm='l2')
+    return vectors.tolist()
+
+
+def gen_vectors(nb, d, seed=np.random.RandomState(1234), is_normal=False):
+    xb = seed.rand(nb, d).astype("float32")
+    xb = klearn.preprocessing.normalize(xb, axis=1, norm='l2')
+    return xb.tolist()


 def gen_binary_vectors(num, dim):
@@ -72,36 +110,6 @@ def gen_binary_vectors(num, dim):
    return raw_vectors, binary_vectors


-def jaccard(x, y):
-    x = np.asarray(x, np.bool)
-    y = np.asarray(y, np.bool)
-    return 1 - np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum())
-
-
-def hamming(x, y):
-    x = np.asarray(x, np.bool)
-    y = np.asarray(y, np.bool)
-    return np.bitwise_xor(x, y).sum()
-
-
-def tanimoto(x, y):
-    x = np.asarray(x, np.bool)
-    y = np.asarray(y, np.bool)
-    return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum()))
-
-
-def substructure(x, y):
-    x = np.asarray(x, np.bool)
-    y = np.asarray(y, np.bool)
-    return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(y)
-
-
-def superstructure(x, y):
-    x = np.asarray(x, np.bool)
-    y = np.asarray(y, np.bool)
-    return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x)
-
-
 def gen_binary_sub_vectors(vectors, length):
    raw_vectors = []
    binary_vectors = []
@@ -127,34 +135,21 @@ def gen_binary_super_vectors(vectors, length):
        raw_vectors.append(raw_vector)
        binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist()))
    return raw_vectors, binary_vectors
-    

-def gen_single_vector(dim):
-    return [[random.random() for _ in range(dim)]]

 def gen_int_attr(row_num):
    return [random.randint(0, 255) for _ in range(row_num)]

+
 def gen_float_attr(row_num):
    return [random.uniform(0, 255) for _ in range(row_num)]

-def gen_vector(nb, d, seed=np.random.RandomState(1234)):
-    xb = seed.rand(nb, d).astype("float32")
-    return xb.tolist()
-

 def gen_unique_str(str_value=None):
    prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
    return "test_" + prefix if str_value is None else str_value + "_" + prefix


-def gen_long_str(num):
-    string = ''
-    for _ in range(num):
-        char = random.choice('tomorrow')
-        string += char
-
-
 def gen_single_filter_fields():
    fields = []
    for data_type in [i.value for i in DataType]:
@@ -168,7 +163,7 @@ def gen_single_vector_fields():
        for data_type in [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR]:
            if metric_type in [MetricType.L2, MetricType.IP] and data_type == DataType.BINARY_VECTOR:
                continue
-            field = {"field": data_type.name, "type": data_type, "dimension": dimension, "extra_params": {"metric_type": metric_type}}
+            field = {"field": data_type.name, "type": data_type, "extra_params": {"metric_type": metric_type, "dimension": dimension}}
            fields.append(field)
    return fields

@@ -179,13 +174,98 @@ def gen_default_fields():
            {"field": "int8", "type": DataType.INT8},
            {"field": "int64", "type": DataType.INT64},
            {"field": "float", "type": DataType.FLOAT},
-            {"field": "float_vector", "type": DataType.FLOAT_VECTOR, "dimension": dimension, "extra_params": {"metric_type": MetricType.L2}
+            {"field": "float_vector", "type": DataType.FLOAT_VECTOR, "extra_params": {"metric_type": MetricType.L2, "dimension": dimension}}
        ],
        "segment_size": segment_size
    }
    return default_fields


+def gen_entities(nb, is_normal=False):
+    vectors = gen_vectors(nb, dimension, is_normal)
+    entities = [
+        {"field": "int8", "type": DataType.INT8, "values": [1 for i in range(nb)]},
+        {"field": "int64", "type": DataType.INT64, "values": [2 for i in range(nb)]},
+        {"field": "float", "type": DataType.FLOAT, "values": [3.0 for i in range(nb)]},
+        {"field": "float_vector", "type": DataType.FLOAT_VECTOR, "values": vectors}
+    ]
+    return entities
+
+
+def gen_binary_entities(nb):
+    raw_vectors, vectors = gen_binary_vectors(nb, dimension)
+    entities = [
+        {"field": "int8", "type": DataType.INT8, "values": [1 for i in range(nb)]},
+        {"field": "int64", "type": DataType.INT64, "values": [2 for i in range(nb)]},
+        {"field": "float", "type": DataType.FLOAT, "values": [3.0 for i in range(nb)]},
+        {"field": "binary_vector", "type": DataType.BINARY_VECTOR, "values": vectors}
+    ]
+    return raw_vectors, entities
+
+
+def add_field(entities):
+    field = {
+        "field": gen_unique_str(), 
+        "type": DataType.INT8, 
+        "values": [1 for i in range(nb)]
+    }
+    entities.append(field)
+    return entities
+
+
+def add_vector_field(entities, is_normal=False):
+    vectors = gen_vectors(nb, dimension, is_normal)
+    field = {
+        "field": gen_unique_str(), 
+        "type": DataType.FLOAT_VECTOR, 
+        "values": vectors
+    }
+    entities.append(field)
+    return entities
+
+
+def remove_field(entities):
+    del entities[0]
+    return entities
+
+
+def remove_vector_field(entities):
+    del entities[-1]
+    return entities
+
+
+def update_field_name(entities, old_name, new_name):
+    for item in entities:
+        if item["field"] == old_name:
+            item["field"] = new_name
+    return entities
+
+
+def update_field_type(entities, old_name, new_name):
+    for item in entities:
+        if item["type"] == old_name:
+            item["type"] = new_name
+    return entities
+
+
+def update_field_value(entities, old_type, new_value):
+    for item in entities:
+        if item["type"] == old_type:
+            for i in item["values"]:
+                item["values"][i] = new_value
+    return entities
+
+
+def add_float_vector_field(nb, dimension):
+    field_name = gen_unique_str()
+    field = {
+        "field": field_name,
+        "type": DataType.FLOAT_VECTOR,
+        "values": gen_vectors(nb, dimension)
+    }
+    return field_name
+        
+
 def gen_segment_sizes():
    sizes = [
            1,
@@ -216,23 +296,6 @@ def gen_invalid_ips():
    return ips


-def gen_invalid_ports():
-    ports = [
-            # empty
-            " ",
-            -1,
-            # too big port
-            100000,
-            # not correct port
-            39540,
-            "BB。A",
-            " siede ",
-            "(mn)",
-            "中文"
-    ]
-    return ports
-
-
 def gen_invalid_uris():
    ip = None
    uris = [
@@ -262,8 +325,11 @@ def gen_invalid_uris():
    return uris


-def gen_invalid_collection_names():
-    collection_names = [
+def gen_invalid_strs():
+    strings = [
+            1,
+            [1],
+            None,
            "12-s",
            " ",
            # "",
@@ -278,65 +344,26 @@ def gen_invalid_collection_names():
            "中文",
            "a".join("a" for i in range(256))
    ]
-    return collection_names
+    return strings


-def gen_invalid_ints():
-    top_ks = [
-            0,
-            -1,
-            None,
-            [1,2,3],
-            (1,2),
-            {"a": 1},
-            " ",
-            "",
-            "String",
-            "12-s",
-            "BB。A",
-            " siede ",
-            "(mn)",
-            "pip+",
+def gen_invalid_field_types():
+    field_types = [
+            1,
            "=c",
-            "中文",
-            "a".join("a" for i in range(256))
-    ]
-    return top_ks
-
-
-def gen_invalid_dims():
-    dims = [
            0,
-            -1,
-            100001,
-            1000000000000001,
            None,
-            False,
-            [1,2,3],
-            (1,2),
-            {"a": 1},
-            " ",
            "",
-            "String",
-            "12-s",
-            "BB。A",
-            " siede ",
-            "(mn)",
-            "pip+",
-            "=c",
-            "中文",
            "a".join("a" for i in range(256))
    ]
-    return dims
+    return field_types


-def gen_invalid_file_sizes():
-    file_sizes = [
-            0,
-            -1,
-            1000000000000001,
+def gen_invalid_ints():
+    top_ks = [
+            1.0,
            None,
-            False,
+            "stringg",
            [1,2,3],
            (1,2),
            {"a": 1},
@@ -352,33 +379,7 @@ def gen_invalid_file_sizes():
            "中文",
            "a".join("a" for i in range(256))
    ]
-    return file_sizes
-
-
-def gen_invalid_index_types():
-    invalid_types = [
-            0,
-            -1,
-            100,
-            1000000000000001,
-            # None,
-            False,
-            [1,2,3],
-            (1,2),
-            {"a": 1},
-            " ",
-            "",
-            "String",
-            "12-s",
-            "BB。A",
-            " siede ",
-            "(mn)",
-            "pip+",
-            "=c",
-            "中文",
-            "a".join("a" for i in range(256))
-    ]
-    return invalid_types
+    return top_ks


 def gen_invalid_params():
@@ -403,52 +404,6 @@ def gen_invalid_params():
    return params


-def gen_invalid_nprobes():
-    nprobes = [
-            0,
-            -1,
-            1000000000000001,
-            None,
-            [1,2,3],
-            (1,2),
-            {"a": 1},
-            " ",
-            "",
-            "String",
-            "12-s",
-            "BB。A",
-            " siede ",
-            "(mn)",
-            "pip+",
-            "=c",
-            "中文"
-    ]
-    return nprobes
-
-
-def gen_invalid_metric_types():
-    metric_types = [
-            0,
-            -1,
-            1000000000000001,
-            # None,
-            [1,2,3],
-            (1,2),
-            {"a": 1},
-            " ",
-            "",
-            "String",
-            "12-s",
-            "BB。A",
-            " siede ",
-            "(mn)",
-            "pip+",
-            "=c",
-            "中文"    
-    ]
-    return metric_types
-
-
 def gen_invalid_vectors():
    invalid_vectors = [
            "1*2",
@@ -476,70 +431,6 @@ def gen_invalid_vectors():
    return invalid_vectors


-def gen_invalid_vector_ids():
-    invalid_vector_ids = [
-            1.0,
-            -1.0,
-            None,
-            # int 64
-            10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000,
-            " ",
-            "",
-            "String",
-            "BB。A",
-            " siede ",
-            "(mn)",
-            "=c",
-            "中文",
-    ]
-    return invalid_vector_ids
-
-
-def gen_invalid_cache_config():
-    invalid_configs = [
-            0,
-            -1,
-            9223372036854775808,
-            [1,2,3],
-            (1,2),
-            {"a": 1},
-            " ",
-            "",
-            "String",
-            "12-s",
-            "BB。A",
-            " siede ",
-            "(mn)",
-            "pip+",
-            "=c",
-            "中文",
-            "'123'",
-            "さようなら"
-    ]
-    return invalid_configs
-
-
-def gen_invalid_gpu_config():
-    invalid_configs = [
-            -1,
-            [1,2,3],
-            (1,2),
-            {"a": 1},
-            " ",
-            "",
-            "String",
-            "12-s",
-            "BB。A",
-            " siede ",
-            "(mn)",
-            "pip+",
-            "=c",
-            "中文",
-            "'123'",
-    ]
-    return invalid_configs
-
-
 def gen_invaild_search_params():
    invalid_search_key = 100
    search_params = []
@@ -572,39 +463,39 @@ def gen_invaild_search_params():
 def gen_invalid_index():
    index_params = []
    for index_type in gen_invalid_index_types():
-        index_param = {"index_type": index_type, "index_param": {"nlist": 1024}}
+        index_param = {"index_type": index_type, "params": {"nlist": 1024}}
        index_params.append(index_param)
    for nlist in gen_invalid_params():
-        index_param = {"index_type": IndexType.IVFLAT, "index_param": {"nlist": nlist}}
+        index_param = {"index_type": IndexType.IVFLAT, "params": {"nlist": nlist}}
        index_params.append(index_param)
    for M in gen_invalid_params():
-        index_param = {"index_type": IndexType.HNSW, "index_param": {"M": M, "efConstruction": 100}}
+        index_param = {"index_type": IndexType.HNSW, "params": {"M": M, "efConstruction": 100}}
        index_params.append(index_param)
    for efConstruction in gen_invalid_params():
-        index_param = {"index_type": IndexType.HNSW, "index_param": {"M": 16, "efConstruction": efConstruction}}
+        index_param = {"index_type": IndexType.HNSW, "params": {"M": 16, "efConstruction": efConstruction}}
        index_params.append(index_param)
    for search_length in gen_invalid_params():
        index_param = {"index_type": IndexType.RNSG,
-                       "index_param": {"search_length": search_length, "out_degree": 40, "candidate_pool_size": 50,
+                       "params": {"search_length": search_length, "out_degree": 40, "candidate_pool_size": 50,
                                       "knng": 100}}
        index_params.append(index_param)
    for out_degree in gen_invalid_params():
        index_param = {"index_type": IndexType.RNSG,
-                       "index_param": {"search_length": 100, "out_degree": out_degree, "candidate_pool_size": 50,
+                       "params": {"search_length": 100, "out_degree": out_degree, "candidate_pool_size": 50,
                                       "knng": 100}}
        index_params.append(index_param)
    for candidate_pool_size in gen_invalid_params():
-        index_param = {"index_type": IndexType.RNSG, "index_param": {"search_length": 100, "out_degree": 40,
+        index_param = {"index_type": IndexType.RNSG, "params": {"search_length": 100, "out_degree": 40,
                                                                     "candidate_pool_size": candidate_pool_size,
                                                                     "knng": 100}}
        index_params.append(index_param)
-    index_params.append({"index_type": IndexType.IVF_FLAT, "index_param": {"invalid_key": 1024}})
-    index_params.append({"index_type": IndexType.HNSW, "index_param": {"invalid_key": 16, "efConstruction": 100}})
+    index_params.append({"index_type": IndexType.IVF_FLAT, "params": {"invalid_key": 1024}})
+    index_params.append({"index_type": IndexType.HNSW, "params": {"invalid_key": 16, "efConstruction": 100}})
    index_params.append({"index_type": IndexType.RNSG,
-                         "index_param": {"invalid_key": 100, "out_degree": 40, "candidate_pool_size": 300,
+                         "params": {"invalid_key": 100, "out_degree": 40, "candidate_pool_size": 300,
                                         "knng": 100}})
    for invalid_n_trees in gen_invalid_params():
-        index_params.append({"index_type": IndexType.ANNOY, "index_param": {"n_trees": invalid_n_trees}})
+        index_params.append({"index_type": IndexType.ANNOY, "params": {"n_trees": invalid_n_trees}})

    return index_params

@@ -663,7 +554,7 @@ def gen_simple_index():
    ]
    index_params = []
    for i in range(len(all_index_types)):
-        index_params.append({"index_type": all_index_types[i], "index_param": params[i]})
+        index_params.append({"index_type": all_index_types[i], "params": params[i]})
    return index_params