提交 dbe90c7a 编写于 作者: G groot

support table partition

上级 b4189371
......@@ -8,6 +8,7 @@ Please mark all change in change log and use the ticket from JIRA.
## Feature
- \#12 - Pure CPU version for Milvus
- \#77 - Support table partition
## Improvement
......
## Data Migration
####0.3.x
legacy data is not migrate-able for later versions
####0.4.x
legacy data can be reused directly by 0.5.x
legacy data can be migrated to 0.6.x
####0.5.x
legacy data can be migrated to 0.6.x
####0.6.x
how to migrate legacy 0.4.x/0.5.x data
for sqlite meta:
```shell
$ sqlite3 [parth_to]/meta.sqlite < sqlite_4_to_6.sql
```
for mysql meta:
```shell
$ mysql -h127.0.0.1 -uroot -p123456 -Dmilvus < mysql_4_to_6.sql
```
alter table Tables add column owner_table VARCHAR(255) DEFAULT '' NOT NULL;
alter table Tables add column partition_tag VARCHAR(255) DEFAULT '' NOT NULL;
alter table Tables add column version VARCHAR(64) DEFAULT '0.6.0' NOT NULL;
update Tables set version='0.6.0';
alter table Tables add column 'owner_table' TEXT DEFAULT '' NOT NULL;
alter table Tables add column 'partition_tag' TEXT DEFAULT '' NOT NULL;
alter table Tables add column 'version' TEXT DEFAULT '0.6.0' NOT NULL;
update Tables set version='0.6.0';
......@@ -47,43 +47,68 @@ class DB {
virtual Status
CreateTable(meta::TableSchema& table_schema_) = 0;
virtual Status
DeleteTable(const std::string& table_id, const meta::DatesT& dates) = 0;
DropTable(const std::string& table_id, const meta::DatesT& dates) = 0;
virtual Status
DescribeTable(meta::TableSchema& table_schema_) = 0;
virtual Status
HasTable(const std::string& table_id, bool& has_or_not_) = 0;
virtual Status
AllTables(std::vector<meta::TableSchema>& table_schema_array) = 0;
virtual Status
GetTableRowCount(const std::string& table_id, uint64_t& row_count) = 0;
virtual Status
PreloadTable(const std::string& table_id) = 0;
virtual Status
UpdateTableFlag(const std::string& table_id, int64_t flag) = 0;
virtual Status
InsertVectors(const std::string& table_id_, uint64_t n, const float* vectors, IDNumbers& vector_ids_) = 0;
CreatePartition(const std::string& table_id, const std::string& partition_name,
const std::string& partition_tag) = 0;
virtual Status
DropPartition(const std::string& partition_name) = 0;
virtual Status
Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors,
QueryResults& results) = 0;
DropPartitionByTag(const std::string& table_id, const std::string& partition_tag) = 0;
virtual Status
Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors,
const meta::DatesT& dates, QueryResults& results) = 0;
ShowPartitions(const std::string& table_id, std::vector<meta::TableSchema>& partiton_schema_array) = 0;
virtual Status
Query(const std::string& table_id, const std::vector<std::string>& file_ids, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) = 0;
InsertVectors(const std::string& table_id, const std::string& partition_tag, uint64_t n, const float* vectors,
IDNumbers& vector_ids_) = 0;
virtual Status
Query(const std::string& table_id, const std::vector<std::string>& partition_tags, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, ResultIds& result_ids, ResultDistances& result_distances) = 0;
virtual Status
Query(const std::string& table_id, const std::vector<std::string>& partition_tags, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
ResultDistances& result_distances) = 0;
virtual Status
QueryByFileID(const std::string& table_id, const std::vector<std::string>& file_ids, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
ResultDistances& result_distances) = 0;
virtual Status
Size(uint64_t& result) = 0;
virtual Status
CreateIndex(const std::string& table_id, const TableIndex& index) = 0;
virtual Status
DescribeIndex(const std::string& table_id, TableIndex& index) = 0;
virtual Status
DropIndex(const std::string& table_id) = 0;
......
此差异已折叠。
......@@ -57,7 +57,7 @@ class DBImpl : public DB {
CreateTable(meta::TableSchema& table_schema) override;
Status
DeleteTable(const std::string& table_id, const meta::DatesT& dates) override;
DropTable(const std::string& table_id, const meta::DatesT& dates) override;
Status
DescribeTable(meta::TableSchema& table_schema) override;
......@@ -78,7 +78,21 @@ class DBImpl : public DB {
GetTableRowCount(const std::string& table_id, uint64_t& row_count) override;
Status
InsertVectors(const std::string& table_id, uint64_t n, const float* vectors, IDNumbers& vector_ids) override;
CreatePartition(const std::string& table_id, const std::string& partition_name,
const std::string& partition_tag) override;
Status
DropPartition(const std::string& partition_name) override;
Status
DropPartitionByTag(const std::string& table_id, const std::string& partition_tag) override;
Status
ShowPartitions(const std::string& table_id, std::vector<meta::TableSchema>& partiton_schema_array) override;
Status
InsertVectors(const std::string& table_id, const std::string& partition_tag, uint64_t n, const float* vectors,
IDNumbers& vector_ids) override;
Status
CreateIndex(const std::string& table_id, const TableIndex& index) override;
......@@ -90,16 +104,18 @@ class DBImpl : public DB {
DropIndex(const std::string& table_id) override;
Status
Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors,
QueryResults& results) override;
Query(const std::string& table_id, const std::vector<std::string>& partition_tags, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, ResultIds& result_ids, ResultDistances& result_distances) override;
Status
Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors,
const meta::DatesT& dates, QueryResults& results) override;
Query(const std::string& table_id, const std::vector<std::string>& partition_tags, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
ResultDistances& result_distances) override;
Status
Query(const std::string& table_id, const std::vector<std::string>& file_ids, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) override;
QueryByFileID(const std::string& table_id, const std::vector<std::string>& file_ids, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
ResultDistances& result_distances) override;
Status
Size(uint64_t& result) override;
......@@ -107,7 +123,7 @@ class DBImpl : public DB {
private:
Status
QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, QueryResults& results);
uint64_t nprobe, const float* vectors, ResultIds& result_ids, ResultDistances& result_distances);
void
BackgroundTimerTask();
......@@ -136,6 +152,28 @@ class DBImpl : public DB {
Status
MemSerialize();
Status
GetFilesToSearch(const std::string& table_id, const std::vector<size_t>& file_ids, meta::TableFilesSchema& files);
Status
GetPartitionsByTags(const std::string& table_id, const std::vector<std::string>& partition_tags,
std::set<std::string>& partition_name_array);
Status
DropTableRecursively(const std::string& table_id, const meta::DatesT& dates);
Status
UpdateTableIndexRecursively(const std::string& table_id, const TableIndex& index);
Status
BuildTableIndexRecursively(const std::string& table_id, const TableIndex& index);
Status
DropTableIndexRecursively(const std::string& table_id);
Status
GetTableRowCountRecursively(const std::string& table_id, uint64_t& row_count);
private:
const DBOptions options_;
......
......@@ -19,6 +19,7 @@
#include "db/engine/ExecutionEngine.h"
#include <faiss/Index.h>
#include <stdint.h>
#include <utility>
#include <vector>
......@@ -30,8 +31,8 @@ typedef int64_t IDNumber;
typedef IDNumber* IDNumberPtr;
typedef std::vector<IDNumber> IDNumbers;
typedef std::vector<std::pair<IDNumber, double>> QueryResult;
typedef std::vector<QueryResult> QueryResults;
typedef std::vector<faiss::Index::idx_t> ResultIds;
typedef std::vector<faiss::Index::distance_t> ResultDistances;
struct TableIndex {
int32_t engine_type_ = (int)EngineType::FAISS_IDMAP;
......
......@@ -50,14 +50,11 @@ class Meta {
virtual Status
AllTables(std::vector<TableSchema>& table_schema_array) = 0;
virtual Status
UpdateTableIndex(const std::string& table_id, const TableIndex& index) = 0;
virtual Status
UpdateTableFlag(const std::string& table_id, int64_t flag) = 0;
virtual Status
DeleteTable(const std::string& table_id) = 0;
DropTable(const std::string& table_id) = 0;
virtual Status
DeleteTableFiles(const std::string& table_id) = 0;
......@@ -66,32 +63,47 @@ class Meta {
CreateTableFile(TableFileSchema& file_schema) = 0;
virtual Status
DropPartitionsByDates(const std::string& table_id, const DatesT& dates) = 0;
DropDataByDate(const std::string& table_id, const DatesT& dates) = 0;
virtual Status
GetTableFiles(const std::string& table_id, const std::vector<size_t>& ids, TableFilesSchema& table_files) = 0;
virtual Status
UpdateTableFile(TableFileSchema& file_schema) = 0;
virtual Status
UpdateTableFiles(TableFilesSchema& files) = 0;
virtual Status
UpdateTableIndex(const std::string& table_id, const TableIndex& index) = 0;
virtual Status
UpdateTableFilesToIndex(const std::string& table_id) = 0;
virtual Status
UpdateTableFile(TableFileSchema& file_schema) = 0;
DescribeTableIndex(const std::string& table_id, TableIndex& index) = 0;
virtual Status
UpdateTableFiles(TableFilesSchema& files) = 0;
DropTableIndex(const std::string& table_id) = 0;
virtual Status
FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates,
DatePartionedTableFilesSchema& files) = 0;
CreatePartition(const std::string& table_name, const std::string& partition_name, const std::string& tag) = 0;
virtual Status
FilesToMerge(const std::string& table_id, DatePartionedTableFilesSchema& files) = 0;
DropPartition(const std::string& partition_name) = 0;
virtual Status
Size(uint64_t& result) = 0;
ShowPartitions(const std::string& table_name, std::vector<meta::TableSchema>& partiton_schema_array) = 0;
virtual Status
Archive() = 0;
GetPartitionName(const std::string& table_name, const std::string& tag, std::string& partition_name) = 0;
virtual Status
FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates,
DatePartionedTableFilesSchema& files) = 0;
virtual Status
FilesToMerge(const std::string& table_id, DatePartionedTableFilesSchema& files) = 0;
virtual Status
FilesToIndex(TableFilesSchema&) = 0;
......@@ -101,10 +113,10 @@ class Meta {
std::vector<std::string>& file_ids) = 0;
virtual Status
DescribeTableIndex(const std::string& table_id, TableIndex& index) = 0;
Size(uint64_t& result) = 0;
virtual Status
DropTableIndex(const std::string& table_id) = 0;
Archive() = 0;
virtual Status
CleanUp() = 0;
......
......@@ -19,6 +19,7 @@
#include "db/Constants.h"
#include "db/engine/ExecutionEngine.h"
#include "src/config.h"
#include <map>
#include <memory>
......@@ -33,6 +34,7 @@ constexpr int32_t DEFAULT_ENGINE_TYPE = (int)EngineType::FAISS_IDMAP;
constexpr int32_t DEFAULT_NLIST = 16384;
constexpr int32_t DEFAULT_METRIC_TYPE = (int)MetricType::L2;
constexpr int32_t DEFAULT_INDEX_FILE_SIZE = ONE_GB;
constexpr char CURRENT_VERSION[] = MILVUS_VERSION;
constexpr int64_t FLAG_MASK_NO_USERID = 0x1;
constexpr int64_t FLAG_MASK_HAS_USERID = 0x1 << 1;
......@@ -57,6 +59,9 @@ struct TableSchema {
int32_t engine_type_ = DEFAULT_ENGINE_TYPE;
int32_t nlist_ = DEFAULT_NLIST;
int32_t metric_type_ = DEFAULT_METRIC_TYPE;
std::string owner_table_;
std::string partition_tag_;
std::string version_ = CURRENT_VERSION;
}; // TableSchema
struct TableFileSchema {
......
此差异已折叠。
......@@ -49,7 +49,7 @@ class MySQLMetaImpl : public Meta {
AllTables(std::vector<TableSchema>& table_schema_array) override;
Status
DeleteTable(const std::string& table_id) override;
DropTable(const std::string& table_id) override;
Status
DeleteTableFiles(const std::string& table_id) override;
......@@ -58,21 +58,26 @@ class MySQLMetaImpl : public Meta {
CreateTableFile(TableFileSchema& file_schema) override;
Status
DropPartitionsByDates(const std::string& table_id, const DatesT& dates) override;
DropDataByDate(const std::string& table_id, const DatesT& dates) override;
Status
GetTableFiles(const std::string& table_id, const std::vector<size_t>& ids, TableFilesSchema& table_files) override;
Status
FilesByType(const std::string& table_id, const std::vector<int>& file_types,
std::vector<std::string>& file_ids) override;
Status
UpdateTableIndex(const std::string& table_id, const TableIndex& index) override;
Status
UpdateTableFlag(const std::string& table_id, int64_t flag) override;
Status
UpdateTableFile(TableFileSchema& file_schema) override;
Status
UpdateTableFilesToIndex(const std::string& table_id) override;
Status
UpdateTableFiles(TableFilesSchema& files) override;
Status
DescribeTableIndex(const std::string& table_id, TableIndex& index) override;
......@@ -80,13 +85,16 @@ class MySQLMetaImpl : public Meta {
DropTableIndex(const std::string& table_id) override;
Status
UpdateTableFile(TableFileSchema& file_schema) override;
CreatePartition(const std::string& table_id, const std::string& partition_name, const std::string& tag) override;
Status
UpdateTableFilesToIndex(const std::string& table_id) override;
DropPartition(const std::string& partition_name) override;
Status
UpdateTableFiles(TableFilesSchema& files) override;
ShowPartitions(const std::string& table_id, std::vector<meta::TableSchema>& partiton_schema_array) override;
Status
GetPartitionName(const std::string& table_id, const std::string& tag, std::string& partition_name) override;
Status
FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates,
......@@ -98,6 +106,10 @@ class MySQLMetaImpl : public Meta {
Status
FilesToIndex(TableFilesSchema&) override;
Status
FilesByType(const std::string& table_id, const std::vector<int>& file_types,
std::vector<std::string>& file_ids) override;
Status
Archive() override;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -49,13 +49,21 @@ void
SearchJob::SearchDone(size_t index_id) {
std::unique_lock<std::mutex> lock(mutex_);
index_files_.erase(index_id);
cv_.notify_all();
if (index_files_.empty()) {
cv_.notify_all();
}
SERVER_LOG_DEBUG << "SearchJob " << id() << " finish index file: " << index_id;
}
ResultSet&
SearchJob::GetResult() {
return result_;
ResultIds&
SearchJob::GetResultIds() {
return result_ids_;
}
ResultDistances&
SearchJob::GetResultDistances() {
return result_distances_;
}
Status&
......
此差异已折叠。
......@@ -39,8 +39,9 @@ class XSearchTask : public Task {
public:
static void
MergeTopkToResultSet(const std::vector<int64_t>& input_ids, const std::vector<float>& input_distance,
uint64_t input_k, uint64_t nq, uint64_t topk, bool ascending, scheduler::ResultSet& result);
MergeTopkToResultSet(const scheduler::ResultIds& src_ids, const scheduler::ResultDistances& src_distances,
size_t src_k, size_t nq, size_t topk, bool ascending, scheduler::ResultIds& tar_ids,
scheduler::ResultDistances& tar_distances);
// static void
// MergeTopkArray(std::vector<int64_t>& tar_ids, std::vector<float>& tar_distance, uint64_t& tar_input_k,
......
......@@ -17,5 +17,7 @@
# under the License.
#-------------------------------------------------------------------------------
aux_source_directory(${MILVUS_SOURCE_DIR}/src/sdk/examples/utils util_files)
add_subdirectory(grpcsimple)
add_subdirectory(simple)
add_subdirectory(partition)
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -20,7 +20,7 @@
#include <cstring>
#include <string>
#include "src/ClientTest.h"
#include "sdk/examples/simple/src/ClientTest.h"
void
print_help(const std::string& app_name);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册