提交 a48ca802 编写于 作者: F FluorineDog 提交者: yefu.chen

Format Code and duplicate class Segment

Signed-off-by: NFluorineDog <guilin.gou@zilliz.com>
上级 1b31b85e
if [ -z $1 ]; then
echo "usage: $0 <path_to_core>"
exit -1
else
echo start formating
fi
CorePath=$1
formatThis() {
find "$1" | grep -E "(*\.cpp|*\.h|*\.cc)$" | grep -v "/thirdparty" | grep -v "\.pb\." | xargs clang-format -i
}
formatThis "${CorePath}/src"
formatThis "${CorePath}/unittest"
......@@ -3,6 +3,7 @@ aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/../pb PB_SRC_FILES)
# add_definitions(-DBOOST_STACKTRACE_USE_ADDR2LINE)
set(DOG_SEGMENT_FILES
SegmentNaive.cpp
SegmentSmallIndex.cpp
IndexMeta.cpp
ConcurrentVector.cpp
Collection.cpp
......@@ -11,6 +12,7 @@ set(DOG_SEGMENT_FILES
partition_c.cpp
segment_c.cpp
EasyAssert.cpp
SegmentBase.cpp
${PB_SRC_FILES}
)
add_library(milvus_dog_segment SHARED
......
#include "Collection.h"
#include "pb/master.pb.h"
#include "pb/common.pb.h"
#include "pb/schema.pb.h"
#include "pb/etcd_meta.pb.h"
#include "pb/message.pb.h"
#include <google/protobuf/text_format.h>
#include <knowhere/index/vector_index/adapter/VectorAdapter.h>
......@@ -91,8 +93,8 @@ Collection::CreateIndex(std::string& index_config) {
return;
}
masterpb::Collection collection;
auto suc = google::protobuf::TextFormat::ParseFromString(index_config, &collection);
milvus::proto::etcd::CollectionMeta collection_meta;
auto suc = google::protobuf::TextFormat::ParseFromString(index_config, &collection_meta);
if (!suc) {
std::cerr << "unmarshal index string failed" << std::endl;
......@@ -100,11 +102,11 @@ Collection::CreateIndex(std::string& index_config) {
index_ = std::make_shared<IndexMeta>(schema_);
for (const auto& index : collection.indexes()) {
std::cout << "add index, index name =" << index.index_name() << ", field_name = " << index.field_name()
<< std::endl;
AddIndex(index);
}
// for (const auto& index : collection_meta.indexes()) {
// std::cout << "add index, index name =" << index.index_name() << ", field_name = " << index.field_name()
// << std::endl;
// AddIndex(index);
// }
}
void
......@@ -118,17 +120,24 @@ Collection::parse() {
return;
}
masterpb::Collection collection;
auto suc = google::protobuf::TextFormat::ParseFromString(schema_json_, &collection);
milvus::proto::etcd::CollectionMeta collection_meta;
auto suc = google::protobuf::TextFormat::ParseFromString(schema_json_, &collection_meta);
if (!suc) {
std::cerr << "unmarshal schema string failed" << std::endl;
}
auto schema = std::make_shared<Schema>();
for (const milvus::grpc::FieldMeta& child : collection.schema().field_metas()) {
std::cout << "add Field, name :" << child.field_name() << ", datatype :" << child.type()
<< ", dim :" << int(child.dim()) << std::endl;
schema->AddField(std::string_view(child.field_name()), DataType{child.type()}, int(child.dim()));
for (const milvus::proto::schema::FieldSchema& child : collection_meta.schema().fields()) {
const auto & type_params = child.type_params();
int dim = 16;
for (const auto & type_param: type_params){
if(type_param.key() == "dim"){
// dim = type_param.value();
}
}
std::cout << "add Field, name :" << child.name() << ", datatype :" << child.data_type()
<< ", dim :" << dim << std::endl;
schema->AddField(std::string_view(child.name()), DataType(child.data_type()), dim);
}
/*
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
......
......@@ -50,7 +50,7 @@ struct DeletedRecord {
std::shared_mutex shared_mutex_;
};
auto
inline auto
DeletedRecord::TmpBitmap::clone(int64_t capacity) -> std::shared_ptr<TmpBitmap> {
auto res = std::make_shared<TmpBitmap>();
res->del_barrier = this->del_barrier;
......
#include "SegmentNaive.h"
#include "SegmentSmallIndex.h"
namespace milvus::dog_segment {
// seems to be deprecated
struct ColumnBasedDataChunk {
std::vector<std::vector<float>> entity_vecs;
static ColumnBasedDataChunk
from(const DogDataChunk& source, const Schema& schema) {
ColumnBasedDataChunk dest;
auto count = source.count;
auto raw_data = reinterpret_cast<const char*>(source.raw_data);
auto align = source.sizeof_per_row;
for (auto& field : schema) {
auto len = field.get_sizeof();
Assert(len % sizeof(float) == 0);
std::vector<float> new_col(len * count / sizeof(float));
for (int64_t i = 0; i < count; ++i) {
memcpy(new_col.data() + i * len / sizeof(float), raw_data + i * align, len);
}
dest.entity_vecs.push_back(std::move(new_col));
// offset the raw_data
raw_data += len / sizeof(float);
}
return dest;
}
};
int
TestABI() {
return 42;
}
std::unique_ptr<SegmentBase>
CreateSegment(SchemaPtr schema) {
auto segment = std::make_unique<SegmentSmallIndex>(schema);
return segment;
}
} // namespace milvus::dog_segment
......@@ -10,17 +10,6 @@
#include <faiss/utils/distances.h>
namespace milvus::dog_segment {
int
TestABI() {
return 42;
}
std::unique_ptr<SegmentBase>
CreateSegment(SchemaPtr schema) {
auto segment = std::make_unique<SegmentNaive>(schema);
return segment;
}
SegmentNaive::Record::Record(const Schema& schema) : uids_(1), timestamps_(1) {
for (auto& field : schema) {
if (field.is_vector()) {
......@@ -317,7 +306,7 @@ SegmentNaive::QueryImpl(query::QueryPtr query_info, Timestamp timestamp, QueryRe
return Status::OK();
}
void
static void
merge_into(int64_t queries,
int64_t topk,
float* distances,
......
......@@ -17,30 +17,6 @@
#include "EasyAssert.h"
namespace milvus::dog_segment {
struct ColumnBasedDataChunk {
std::vector<std::vector<float>> entity_vecs;
static ColumnBasedDataChunk
from(const DogDataChunk& source, const Schema& schema) {
ColumnBasedDataChunk dest;
auto count = source.count;
auto raw_data = reinterpret_cast<const char*>(source.raw_data);
auto align = source.sizeof_per_row;
for (auto& field : schema) {
auto len = field.get_sizeof();
Assert(len % sizeof(float) == 0);
std::vector<float> new_col(len * count / sizeof(float));
for (int64_t i = 0; i < count; ++i) {
memcpy(new_col.data() + i * len / sizeof(float), raw_data + i * align, len);
}
dest.entity_vecs.push_back(std::move(new_col));
// offset the raw_data
raw_data += len / sizeof(float);
}
return dest;
}
};
class SegmentNaive : public SegmentBase {
public:
virtual ~SegmentNaive() = default;
......
此差异已折叠。
#pragma once
#include <tbb/concurrent_priority_queue.h>
#include <tbb/concurrent_unordered_map.h>
#include <tbb/concurrent_vector.h>
#include <shared_mutex>
#include <knowhere/index/vector_index/VecIndex.h>
#include "AckResponder.h"
#include "ConcurrentVector.h"
#include "dog_segment/SegmentBase.h"
// #include "knowhere/index/structured_index/StructuredIndex.h"
#include "query/GeneralQuery.h"
#include "utils/Status.h"
#include "dog_segment/DeletedRecord.h"
#include "EasyAssert.h"
namespace milvus::dog_segment {
// struct ColumnBasedDataChunk {
// std::vector<std::vector<float>> entity_vecs;
//
// static ColumnBasedDataChunk
// from(const DogDataChunk& source, const Schema& schema) {
// ColumnBasedDataChunk dest;
// auto count = source.count;
// auto raw_data = reinterpret_cast<const char*>(source.raw_data);
// auto align = source.sizeof_per_row;
// for (auto& field : schema) {
// auto len = field.get_sizeof();
// Assert(len % sizeof(float) == 0);
// std::vector<float> new_col(len * count / sizeof(float));
// for (int64_t i = 0; i < count; ++i) {
// memcpy(new_col.data() + i * len / sizeof(float), raw_data + i * align, len);
// }
// dest.entity_vecs.push_back(std::move(new_col));
// // offset the raw_data
// raw_data += len / sizeof(float);
// }
// return dest;
// }
//};
class SegmentSmallIndex : public SegmentBase {
public:
virtual ~SegmentSmallIndex() = default;
// SegmentBase(std::shared_ptr<FieldsInfo> collection);
int64_t
PreInsert(int64_t size) override;
// TODO: originally, id should be put into data_chunk
// TODO: Is it ok to put them the other side?
Status
Insert(int64_t reserverd_offset,
int64_t size,
const int64_t* primary_keys,
const Timestamp* timestamps,
const DogDataChunk& values) override;
int64_t
PreDelete(int64_t size) override;
// TODO: add id into delete log, possibly bitmap
Status
Delete(int64_t reserverd_offset, int64_t size, const int64_t* primary_keys, const Timestamp* timestamps) override;
// query contains metadata of
Status
Query(query::QueryPtr query_info, Timestamp timestamp, QueryResult& results) override;
// stop receive insert requests
// will move data to immutable vector or something
Status
Close() override;
// using IndexType = knowhere::IndexType;
// using IndexMode = knowhere::IndexMode;
// using IndexConfig = knowhere::Config;
// BuildIndex With Paramaters, must with Frozen State
// NOTE: index_params contains serveral policies for several index
// TODO: currently, index has to be set at startup, and can't be modified
// AddIndex and DropIndex will be added later
Status
BuildIndex(IndexMetaPtr index_meta) override;
Status
DropRawData(std::string_view field_name) override {
// TODO: NO-OP
return Status::OK();
}
Status
LoadRawData(std::string_view field_name, const char* blob, int64_t blob_size) override {
// TODO: NO-OP
return Status::OK();
}
int64_t
GetMemoryUsageInBytes() override;
public:
ssize_t
get_row_count() const override {
return record_.ack_responder_.GetAck();
}
SegmentState
get_state() const override {
return state_.load(std::memory_order_relaxed);
}
ssize_t
get_deleted_count() const override {
return 0;
}
public:
friend std::unique_ptr<SegmentBase>
CreateSegment(SchemaPtr schema);
explicit SegmentSmallIndex(SchemaPtr schema) : schema_(schema), record_(*schema) {
}
private:
// struct MutableRecord {
// ConcurrentVector<uint64_t> uids_;
// tbb::concurrent_vector<Timestamp> timestamps_;
// std::vector<tbb::concurrent_vector<float>> entity_vecs_;
//
// MutableRecord(int entity_size) : entity_vecs_(entity_size) {
// }
// };
struct Record {
std::atomic<int64_t> reserved = 0;
AckResponder ack_responder_;
ConcurrentVector<Timestamp, true> timestamps_;
ConcurrentVector<idx_t, true> uids_;
std::vector<std::shared_ptr<VectorBase>> entity_vec_;
Record(const Schema& schema);
template <typename Type>
auto
get_vec_entity(int offset) {
return std::static_pointer_cast<ConcurrentVector<Type>>(entity_vec_[offset]);
}
};
std::shared_ptr<DeletedRecord::TmpBitmap>
get_deleted_bitmap(int64_t del_barrier, Timestamp query_timestamp, int64_t insert_barrier, bool force = false);
Status
QueryImpl(query::QueryPtr query, Timestamp timestamp, QueryResult& results);
Status
QuerySlowImpl(query::QueryPtr query, Timestamp timestamp, QueryResult& results);
Status
QueryBruteForceImpl(query::QueryPtr query, Timestamp timestamp, QueryResult& results);
template <typename Type>
knowhere::IndexPtr
BuildVecIndexImpl(const IndexMeta::Entry& entry);
private:
SchemaPtr schema_;
std::atomic<SegmentState> state_ = SegmentState::Open;
Record record_;
DeletedRecord deleted_record_;
std::atomic<bool> index_ready_ = false;
IndexMetaPtr index_meta_;
std::unordered_map<std::string, knowhere::IndexPtr> indexings_; // index_name => indexing
tbb::concurrent_unordered_multimap<idx_t, int64_t> uid2offset_;
};
} // namespace milvus::dog_segment
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
# TODO
set(MILVUS_QUERY_SRCS
BinaryQuery.cpp
Parser.cpp
)
add_library(milvus_query ${MILVUS_QUERY_SRCS})
target_link_libraries(milvus_query libprotobuf)
......@@ -86,7 +86,7 @@ add_custom_command(TARGET generate_milvus_pb_grpc
POST_BUILD
COMMAND ${PROTOC_EXCUTABLE} -I "${PROTO_PATH}" --cpp_out "${PROTO_OUTPUT_PATH}"
"message.proto" "master.proto"
"common.proto" "schema.proto" "etcd_meta.proto"
DEPENDS "${PROTO_PATH}/*.proto"
)
......
此差异已折叠。
......@@ -3,7 +3,6 @@ package segment
import (
"time"
masterpb "github.com/zilliztech/milvus-distributed/internal/proto/master"
jsoniter "github.com/json-iterator/go"
)
......@@ -18,7 +17,6 @@ type Segment struct {
OpenTimeStamp uint64 `json:"open_timestamp"`
CloseTimeStamp uint64 `json:"close_timestamp"`
CollectionName string `json:"collection_name"`
Status masterpb.SegmentStatus `json:"segment_status"`
Rows int64 `json:"rows"`
}
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册