diff --git a/core/src/dog_segment/Collection.cpp b/core/src/dog_segment/Collection.cpp index 56905031b49fc89910352a22d4095450e81a285e..d97c0fe92b7ad3d3aef81710e85426581f7324c6 100644 --- a/core/src/dog_segment/Collection.cpp +++ b/core/src/dog_segment/Collection.cpp @@ -31,7 +31,7 @@ Collection::parse() { } auto schema = std::make_shared(); for (const milvus::grpc::FieldMeta & child: collection.schema().field_metas()){ - std::cout<<"add Field, name :" << child.field_name() << std::endl; + std::cout<<"add Field, name :" << child.field_name() << ", datatype :" << child.type() << ", dim :" << int(child.dim()) << std::endl; schema->AddField(std::string_view(child.field_name()), DataType {child.type()}, int(child.dim())); } /* diff --git a/core/src/dog_segment/SegmentDefs.h b/core/src/dog_segment/SegmentDefs.h index 97ae5784c13ed4a238eb5e4c598e6ed857674520..e986c661580925fdbe69e5f8a0d0d7c2a1fdb6d4 100644 --- a/core/src/dog_segment/SegmentDefs.h +++ b/core/src/dog_segment/SegmentDefs.h @@ -161,6 +161,10 @@ class Schema { } } + const std::vector& get_fields() { + return fields_; + } + const FieldMeta& operator[](const std::string& field_name) const { auto offset_iter = offsets_.find(field_name); diff --git a/core/src/dog_segment/SegmentNaive.cpp b/core/src/dog_segment/SegmentNaive.cpp index a5bbdcd8e01f42ac64b540f06f1f6c95b8a4a297..235732813b5dd02779adc87587375a2fa9d28ad2 100644 --- a/core/src/dog_segment/SegmentNaive.cpp +++ b/core/src/dog_segment/SegmentNaive.cpp @@ -19,8 +19,20 @@ TestABI() { std::unique_ptr CreateSegment(SchemaPtr schema, IndexMetaPtr remote_index_meta) { if (remote_index_meta == nullptr) { + int dim = 0; + std::string index_field_name; + + for (auto& field: schema->get_fields()) { + if (field.get_data_type() == DataType::VECTOR_FLOAT) { + dim = field.get_dim(); + index_field_name = field.get_name(); + } + } + + assert(dim != 0); + assert(!index_field_name.empty()); + auto index_meta = std::make_shared(schema); - auto dim = schema->operator[]("fakevec").get_dim(); // TODO: this is merge of query conf and insert conf // TODO: should be splitted into multiple configs auto conf = milvus::knowhere::Config{ @@ -32,7 +44,7 @@ CreateSegment(SchemaPtr schema, IndexMetaPtr remote_index_meta) { {milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2}, {milvus::knowhere::meta::DEVICEID, 0}, }; - index_meta->AddEntry("fakeindex", "fakevec", knowhere::IndexEnum::INDEX_FAISS_IVFPQ, + index_meta->AddEntry("fakeindex", index_field_name, knowhere::IndexEnum::INDEX_FAISS_IVFPQ, knowhere::IndexMode::MODE_CPU, conf); remote_index_meta = index_meta; } @@ -141,7 +153,11 @@ Status SegmentNaive::Insert(int64_t reserved_begin, int64_t size, const int64_t *uids_raw, const Timestamp *timestamps_raw, const DogDataChunk &entities_raw) { assert(entities_raw.count == size); - assert(entities_raw.sizeof_per_row == schema_->get_total_sizeof()); + if (entities_raw.sizeof_per_row != schema_->get_total_sizeof()) { + std::string msg = "entity length = " + std::to_string(entities_raw.sizeof_per_row) + + ", schema length = " + std::to_string(schema_->get_total_sizeof()); + throw std::runtime_error(msg); + } auto raw_data = reinterpret_cast(entities_raw.raw_data); // std::vector entities(raw_data, raw_data + size * len_per_row); diff --git a/reader/read_node/index_test.go b/reader/read_node/index_test.go index d90d6c477c7a93689a2c2d5cfb3224a7bbc3ef7a..a6f9abe340a7d8101ccdf0d6884fe26466626bb5 100644 --- a/reader/read_node/index_test.go +++ b/reader/read_node/index_test.go @@ -13,7 +13,7 @@ import ( func TestIndex_BuildIndex(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) diff --git a/reader/read_node/result_test.go b/reader/read_node/result_test.go index e93676df9f3253cf58942af68b907d10864ab74f..d82f3559fcb0be858822a195a474be1043533b8a 100644 --- a/reader/read_node/result_test.go +++ b/reader/read_node/result_test.go @@ -10,7 +10,7 @@ import ( func TestResult_PublishSearchResult(t *testing.T) { // Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) node.SegmentsMap[0] = segment @@ -34,7 +34,7 @@ func TestResult_PublishSearchResult(t *testing.T) { func TestResult_PublishFailedSearchResult(t *testing.T) { // Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) node.SegmentsMap[0] = segment @@ -46,7 +46,7 @@ func TestResult_PublishFailedSearchResult(t *testing.T) { func TestResult_PublicStatistic(t *testing.T) { // Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) node.SegmentsMap[0] = segment diff --git a/reader/read_node/segment.go b/reader/read_node/segment.go index 9bd85bd94205b44ff347752edb1f2204eec668c7..bfcdcf75433ba151cd7844b7b1f83f0dd54450ef 100644 --- a/reader/read_node/segment.go +++ b/reader/read_node/segment.go @@ -77,7 +77,7 @@ func (s *Segment) Close() error { } // Build index after closing segment - go s.buildIndex() + // go s.buildIndex() return nil } diff --git a/reader/read_node/segment_service_test.go b/reader/read_node/segment_service_test.go index 6acfed663d2005dc537b37a4df0cbe9fa98a06f3..ae0590a86710d144e3b5e606b5adb5b569ce2f14 100644 --- a/reader/read_node/segment_service_test.go +++ b/reader/read_node/segment_service_test.go @@ -7,7 +7,7 @@ import ( func TestSegmentManagement_SegmentsManagement(t *testing.T) { // Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) node.SegmentsMap[0] = segment @@ -19,7 +19,7 @@ func TestSegmentManagement_SegmentsManagement(t *testing.T) { func TestSegmentManagement_SegmentService(t *testing.T) { // Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) node.SegmentsMap[0] = segment @@ -31,7 +31,7 @@ func TestSegmentManagement_SegmentService(t *testing.T) { func TestSegmentManagement_SegmentStatistic(t *testing.T) { // Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) node.SegmentsMap[0] = segment @@ -43,7 +43,7 @@ func TestSegmentManagement_SegmentStatistic(t *testing.T) { func TestSegmentManagement_SegmentStatisticService(t *testing.T) { // Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) node.SegmentsMap[0] = segment diff --git a/reader/read_node/segment_test.go b/reader/read_node/segment_test.go index e06d3a85c5b95a3faffa9835be7f18b1e6b44d93..f9fe967755c99e6d71e6c181a28fab1f6d793777 100644 --- a/reader/read_node/segment_test.go +++ b/reader/read_node/segment_test.go @@ -13,7 +13,7 @@ import ( func TestSegment_ConstructorAndDestructor(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) @@ -26,7 +26,7 @@ func TestSegment_ConstructorAndDestructor(t *testing.T) { func TestSegment_SegmentInsert(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) @@ -71,7 +71,7 @@ func TestSegment_SegmentInsert(t *testing.T) { func TestSegment_SegmentDelete(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) @@ -96,7 +96,7 @@ func TestSegment_SegmentDelete(t *testing.T) { func TestSegment_SegmentSearch(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) @@ -156,7 +156,7 @@ func TestSegment_SegmentSearch(t *testing.T) { func TestSegment_SegmentPreInsert(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) @@ -173,7 +173,7 @@ func TestSegment_SegmentPreInsert(t *testing.T) { func TestSegment_SegmentPreDelete(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) @@ -192,7 +192,7 @@ func TestSegment_SegmentPreDelete(t *testing.T) { func TestSegment_GetStatus(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) @@ -209,7 +209,7 @@ func TestSegment_GetStatus(t *testing.T) { func TestSegment_Close(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) @@ -226,7 +226,7 @@ func TestSegment_Close(t *testing.T) { func TestSegment_GetRowCount(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) @@ -275,7 +275,7 @@ func TestSegment_GetRowCount(t *testing.T) { func TestSegment_GetDeletedCount(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) @@ -305,7 +305,7 @@ func TestSegment_GetDeletedCount(t *testing.T) { func TestSegment_GetMemSize(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) @@ -350,3 +350,51 @@ func TestSegment_GetMemSize(t *testing.T) { collection.DeletePartition(partition) node.DeleteCollection(collection) } + +func TestSegment_RealSchemaTest(t *testing.T) { + // 1. Construct node, collection, partition and segment + // var schemaString = "id: 6873737669791618215\nname: \"collection0\"\nschema: \u003c\n field_metas: \u003c\n field_name: \"field_1\"\n type: INT64\n \u003e\n field_metas: \u003c\n field_name: \"field_2\"\n type: FLOAT\n \u003e\n field_metas: \u003c\n field_name: \"field_3\"\n type: INT32\n \u003e\n field_metas: \u003c\n field_name: \"field_vec\"\n type: VECTOR_FLOAT\n \u003e\n\u003e\ncreate_time: 1600416765\nsegment_ids: 6873737669791618215\npartition_tags: \"default\"\n" + // var schemaString = "id: 6873737669791618215\nname: \"collection0\"\nschema: \u003c\n field_metas: \u003c\n field_name: \"age\"\n type: INT32\n \u003e\n field_metas: \u003c\n field_name: \"fakevec\"\n type: VECTOR_FLOAT\n \u003e\n\u003e\ncreate_time: 1600416765\nsegment_ids: 6873737669791618215\npartition_tags: \"default\"\n" + var schemaString = "id: 6873737669791618215\nname: \"collection0\"\nschema: \u003c\n field_metas: \u003c\n field_name: \"age\"\n type: INT32\n dim: 1\n \u003e\n field_metas: \u003c\n field_name: \"field_1\"\n type: VECTOR_FLOAT\n dim: 16\n \u003e\n\u003e\ncreate_time: 1600416765\nsegment_ids: 6873737669791618215\npartition_tags: \"default\"\n" + node := NewQueryNode(0, 0) + var collection = node.NewCollection(0, "collection0", schemaString) + var partition = collection.NewPartition("partition0") + var segment = partition.NewSegment(0) + + // 2. Create ids and timestamps + ids := []int64{1, 2, 3} + timestamps := []uint64{0, 0, 0} + + // 3. Create records, use schema below: + // schema_tmp->AddField("fakeVec", DataType::VECTOR_FLOAT, 16); + // schema_tmp->AddField("age", DataType::INT32); + const DIM = 16 + const N = 3 + var vec = [DIM]float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} + var rawData []byte + for _, ele := range vec { + buf := make([]byte, 4) + binary.LittleEndian.PutUint32(buf, math.Float32bits(ele)) + rawData = append(rawData, buf...) + } + bs := make([]byte, 4) + binary.LittleEndian.PutUint32(bs, 1) + rawData = append(rawData, bs...) + var records [][]byte + for i := 0; i < N; i++ { + records = append(records, rawData) + } + + // 4. Do PreInsert + var offset = segment.SegmentPreInsert(N) + assert.GreaterOrEqual(t, offset, int64(0)) + + // 5. Do Insert + var err = segment.SegmentInsert(offset, &ids, ×tamps, &records) + assert.NoError(t, err) + + // 6. Destruct node, collection, and segment + partition.DeleteSegment(segment) + collection.DeletePartition(partition) + node.DeleteCollection(collection) +} diff --git a/reader/read_node/util_functions_test.go b/reader/read_node/util_functions_test.go index cc98e0a1a369e67718d6fd8c15af7a18a3bd1399..7f0ae295e44f838163e1d14041c54b835ede67be 100644 --- a/reader/read_node/util_functions_test.go +++ b/reader/read_node/util_functions_test.go @@ -13,7 +13,7 @@ func TestUtilFunctions_GetKey2Segments(t *testing.T) { func TestUtilFunctions_GetCollectionByCollectionName(t *testing.T) { // 1. Construct node, and collections node := NewQueryNode(0, 0) - var _ = node.NewCollection(0, "collection0", "fake schema") + var _ = node.NewCollection(0, "collection0", "") // 2. Get collection by collectionName var c0, err = node.GetCollectionByCollectionName("collection0") @@ -27,7 +27,7 @@ func TestUtilFunctions_GetCollectionByCollectionName(t *testing.T) { func TestUtilFunctions_GetSegmentBySegmentID(t *testing.T) { // 1. Construct node, collection, partition and segment node := NewQueryNode(0, 0) - var collection = node.NewCollection(0, "collection0", "fake schema") + var collection = node.NewCollection(0, "collection0", "") var partition = collection.NewPartition("partition0") var segment = partition.NewSegment(0) node.SegmentsMap[0] = segment