提交 dbe90c7a 编写于 作者: G groot

support table partition

上级 b4189371
...@@ -8,6 +8,7 @@ Please mark all change in change log and use the ticket from JIRA. ...@@ -8,6 +8,7 @@ Please mark all change in change log and use the ticket from JIRA.
## Feature ## Feature
- \#12 - Pure CPU version for Milvus - \#12 - Pure CPU version for Milvus
- \#77 - Support table partition
## Improvement ## Improvement
......
## Data Migration
####0.3.x
legacy data is not migrate-able for later versions
####0.4.x
legacy data can be reused directly by 0.5.x
legacy data can be migrated to 0.6.x
####0.5.x
legacy data can be migrated to 0.6.x
####0.6.x
how to migrate legacy 0.4.x/0.5.x data
for sqlite meta:
```shell
$ sqlite3 [parth_to]/meta.sqlite < sqlite_4_to_6.sql
```
for mysql meta:
```shell
$ mysql -h127.0.0.1 -uroot -p123456 -Dmilvus < mysql_4_to_6.sql
```
alter table Tables add column owner_table VARCHAR(255) DEFAULT '' NOT NULL;
alter table Tables add column partition_tag VARCHAR(255) DEFAULT '' NOT NULL;
alter table Tables add column version VARCHAR(64) DEFAULT '0.6.0' NOT NULL;
update Tables set version='0.6.0';
alter table Tables add column 'owner_table' TEXT DEFAULT '' NOT NULL;
alter table Tables add column 'partition_tag' TEXT DEFAULT '' NOT NULL;
alter table Tables add column 'version' TEXT DEFAULT '0.6.0' NOT NULL;
update Tables set version='0.6.0';
...@@ -47,43 +47,68 @@ class DB { ...@@ -47,43 +47,68 @@ class DB {
virtual Status virtual Status
CreateTable(meta::TableSchema& table_schema_) = 0; CreateTable(meta::TableSchema& table_schema_) = 0;
virtual Status virtual Status
DeleteTable(const std::string& table_id, const meta::DatesT& dates) = 0; DropTable(const std::string& table_id, const meta::DatesT& dates) = 0;
virtual Status virtual Status
DescribeTable(meta::TableSchema& table_schema_) = 0; DescribeTable(meta::TableSchema& table_schema_) = 0;
virtual Status virtual Status
HasTable(const std::string& table_id, bool& has_or_not_) = 0; HasTable(const std::string& table_id, bool& has_or_not_) = 0;
virtual Status virtual Status
AllTables(std::vector<meta::TableSchema>& table_schema_array) = 0; AllTables(std::vector<meta::TableSchema>& table_schema_array) = 0;
virtual Status virtual Status
GetTableRowCount(const std::string& table_id, uint64_t& row_count) = 0; GetTableRowCount(const std::string& table_id, uint64_t& row_count) = 0;
virtual Status virtual Status
PreloadTable(const std::string& table_id) = 0; PreloadTable(const std::string& table_id) = 0;
virtual Status virtual Status
UpdateTableFlag(const std::string& table_id, int64_t flag) = 0; UpdateTableFlag(const std::string& table_id, int64_t flag) = 0;
virtual Status virtual Status
InsertVectors(const std::string& table_id_, uint64_t n, const float* vectors, IDNumbers& vector_ids_) = 0; CreatePartition(const std::string& table_id, const std::string& partition_name,
const std::string& partition_tag) = 0;
virtual Status
DropPartition(const std::string& partition_name) = 0;
virtual Status virtual Status
Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, DropPartitionByTag(const std::string& table_id, const std::string& partition_tag) = 0;
QueryResults& results) = 0;
virtual Status virtual Status
Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, ShowPartitions(const std::string& table_id, std::vector<meta::TableSchema>& partiton_schema_array) = 0;
const meta::DatesT& dates, QueryResults& results) = 0;
virtual Status virtual Status
Query(const std::string& table_id, const std::vector<std::string>& file_ids, uint64_t k, uint64_t nq, InsertVectors(const std::string& table_id, const std::string& partition_tag, uint64_t n, const float* vectors,
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) = 0; IDNumbers& vector_ids_) = 0;
virtual Status
Query(const std::string& table_id, const std::vector<std::string>& partition_tags, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, ResultIds& result_ids, ResultDistances& result_distances) = 0;
virtual Status
Query(const std::string& table_id, const std::vector<std::string>& partition_tags, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
ResultDistances& result_distances) = 0;
virtual Status
QueryByFileID(const std::string& table_id, const std::vector<std::string>& file_ids, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
ResultDistances& result_distances) = 0;
virtual Status virtual Status
Size(uint64_t& result) = 0; Size(uint64_t& result) = 0;
virtual Status virtual Status
CreateIndex(const std::string& table_id, const TableIndex& index) = 0; CreateIndex(const std::string& table_id, const TableIndex& index) = 0;
virtual Status virtual Status
DescribeIndex(const std::string& table_id, TableIndex& index) = 0; DescribeIndex(const std::string& table_id, TableIndex& index) = 0;
virtual Status virtual Status
DropIndex(const std::string& table_id) = 0; DropIndex(const std::string& table_id) = 0;
......
此差异已折叠。
...@@ -57,7 +57,7 @@ class DBImpl : public DB { ...@@ -57,7 +57,7 @@ class DBImpl : public DB {
CreateTable(meta::TableSchema& table_schema) override; CreateTable(meta::TableSchema& table_schema) override;
Status Status
DeleteTable(const std::string& table_id, const meta::DatesT& dates) override; DropTable(const std::string& table_id, const meta::DatesT& dates) override;
Status Status
DescribeTable(meta::TableSchema& table_schema) override; DescribeTable(meta::TableSchema& table_schema) override;
...@@ -78,7 +78,21 @@ class DBImpl : public DB { ...@@ -78,7 +78,21 @@ class DBImpl : public DB {
GetTableRowCount(const std::string& table_id, uint64_t& row_count) override; GetTableRowCount(const std::string& table_id, uint64_t& row_count) override;
Status Status
InsertVectors(const std::string& table_id, uint64_t n, const float* vectors, IDNumbers& vector_ids) override; CreatePartition(const std::string& table_id, const std::string& partition_name,
const std::string& partition_tag) override;
Status
DropPartition(const std::string& partition_name) override;
Status
DropPartitionByTag(const std::string& table_id, const std::string& partition_tag) override;
Status
ShowPartitions(const std::string& table_id, std::vector<meta::TableSchema>& partiton_schema_array) override;
Status
InsertVectors(const std::string& table_id, const std::string& partition_tag, uint64_t n, const float* vectors,
IDNumbers& vector_ids) override;
Status Status
CreateIndex(const std::string& table_id, const TableIndex& index) override; CreateIndex(const std::string& table_id, const TableIndex& index) override;
...@@ -90,16 +104,18 @@ class DBImpl : public DB { ...@@ -90,16 +104,18 @@ class DBImpl : public DB {
DropIndex(const std::string& table_id) override; DropIndex(const std::string& table_id) override;
Status Status
Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, Query(const std::string& table_id, const std::vector<std::string>& partition_tags, uint64_t k, uint64_t nq,
QueryResults& results) override; uint64_t nprobe, const float* vectors, ResultIds& result_ids, ResultDistances& result_distances) override;
Status Status
Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, Query(const std::string& table_id, const std::vector<std::string>& partition_tags, uint64_t k, uint64_t nq,
const meta::DatesT& dates, QueryResults& results) override; uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
ResultDistances& result_distances) override;
Status Status
Query(const std::string& table_id, const std::vector<std::string>& file_ids, uint64_t k, uint64_t nq, QueryByFileID(const std::string& table_id, const std::vector<std::string>& file_ids, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) override; uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
ResultDistances& result_distances) override;
Status Status
Size(uint64_t& result) override; Size(uint64_t& result) override;
...@@ -107,7 +123,7 @@ class DBImpl : public DB { ...@@ -107,7 +123,7 @@ class DBImpl : public DB {
private: private:
Status Status
QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq, QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq,
uint64_t nprobe, const float* vectors, QueryResults& results); uint64_t nprobe, const float* vectors, ResultIds& result_ids, ResultDistances& result_distances);
void void
BackgroundTimerTask(); BackgroundTimerTask();
...@@ -136,6 +152,28 @@ class DBImpl : public DB { ...@@ -136,6 +152,28 @@ class DBImpl : public DB {
Status Status
MemSerialize(); MemSerialize();
Status
GetFilesToSearch(const std::string& table_id, const std::vector<size_t>& file_ids, meta::TableFilesSchema& files);
Status
GetPartitionsByTags(const std::string& table_id, const std::vector<std::string>& partition_tags,
std::set<std::string>& partition_name_array);
Status
DropTableRecursively(const std::string& table_id, const meta::DatesT& dates);
Status
UpdateTableIndexRecursively(const std::string& table_id, const TableIndex& index);
Status
BuildTableIndexRecursively(const std::string& table_id, const TableIndex& index);
Status
DropTableIndexRecursively(const std::string& table_id);
Status
GetTableRowCountRecursively(const std::string& table_id, uint64_t& row_count);
private: private:
const DBOptions options_; const DBOptions options_;
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "db/engine/ExecutionEngine.h" #include "db/engine/ExecutionEngine.h"
#include <faiss/Index.h>
#include <stdint.h> #include <stdint.h>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -30,8 +31,8 @@ typedef int64_t IDNumber; ...@@ -30,8 +31,8 @@ typedef int64_t IDNumber;
typedef IDNumber* IDNumberPtr; typedef IDNumber* IDNumberPtr;
typedef std::vector<IDNumber> IDNumbers; typedef std::vector<IDNumber> IDNumbers;
typedef std::vector<std::pair<IDNumber, double>> QueryResult; typedef std::vector<faiss::Index::idx_t> ResultIds;
typedef std::vector<QueryResult> QueryResults; typedef std::vector<faiss::Index::distance_t> ResultDistances;
struct TableIndex { struct TableIndex {
int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; int32_t engine_type_ = (int)EngineType::FAISS_IDMAP;
......
...@@ -50,14 +50,11 @@ class Meta { ...@@ -50,14 +50,11 @@ class Meta {
virtual Status virtual Status
AllTables(std::vector<TableSchema>& table_schema_array) = 0; AllTables(std::vector<TableSchema>& table_schema_array) = 0;
virtual Status
UpdateTableIndex(const std::string& table_id, const TableIndex& index) = 0;
virtual Status virtual Status
UpdateTableFlag(const std::string& table_id, int64_t flag) = 0; UpdateTableFlag(const std::string& table_id, int64_t flag) = 0;
virtual Status virtual Status
DeleteTable(const std::string& table_id) = 0; DropTable(const std::string& table_id) = 0;
virtual Status virtual Status
DeleteTableFiles(const std::string& table_id) = 0; DeleteTableFiles(const std::string& table_id) = 0;
...@@ -66,32 +63,47 @@ class Meta { ...@@ -66,32 +63,47 @@ class Meta {
CreateTableFile(TableFileSchema& file_schema) = 0; CreateTableFile(TableFileSchema& file_schema) = 0;
virtual Status virtual Status
DropPartitionsByDates(const std::string& table_id, const DatesT& dates) = 0; DropDataByDate(const std::string& table_id, const DatesT& dates) = 0;
virtual Status virtual Status
GetTableFiles(const std::string& table_id, const std::vector<size_t>& ids, TableFilesSchema& table_files) = 0; GetTableFiles(const std::string& table_id, const std::vector<size_t>& ids, TableFilesSchema& table_files) = 0;
virtual Status
UpdateTableFile(TableFileSchema& file_schema) = 0;
virtual Status
UpdateTableFiles(TableFilesSchema& files) = 0;
virtual Status
UpdateTableIndex(const std::string& table_id, const TableIndex& index) = 0;
virtual Status virtual Status
UpdateTableFilesToIndex(const std::string& table_id) = 0; UpdateTableFilesToIndex(const std::string& table_id) = 0;
virtual Status virtual Status
UpdateTableFile(TableFileSchema& file_schema) = 0; DescribeTableIndex(const std::string& table_id, TableIndex& index) = 0;
virtual Status virtual Status
UpdateTableFiles(TableFilesSchema& files) = 0; DropTableIndex(const std::string& table_id) = 0;
virtual Status virtual Status
FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates, CreatePartition(const std::string& table_name, const std::string& partition_name, const std::string& tag) = 0;
DatePartionedTableFilesSchema& files) = 0;
virtual Status virtual Status
FilesToMerge(const std::string& table_id, DatePartionedTableFilesSchema& files) = 0; DropPartition(const std::string& partition_name) = 0;
virtual Status virtual Status
Size(uint64_t& result) = 0; ShowPartitions(const std::string& table_name, std::vector<meta::TableSchema>& partiton_schema_array) = 0;
virtual Status virtual Status
Archive() = 0; GetPartitionName(const std::string& table_name, const std::string& tag, std::string& partition_name) = 0;
virtual Status
FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates,
DatePartionedTableFilesSchema& files) = 0;
virtual Status
FilesToMerge(const std::string& table_id, DatePartionedTableFilesSchema& files) = 0;
virtual Status virtual Status
FilesToIndex(TableFilesSchema&) = 0; FilesToIndex(TableFilesSchema&) = 0;
...@@ -101,10 +113,10 @@ class Meta { ...@@ -101,10 +113,10 @@ class Meta {
std::vector<std::string>& file_ids) = 0; std::vector<std::string>& file_ids) = 0;
virtual Status virtual Status
DescribeTableIndex(const std::string& table_id, TableIndex& index) = 0; Size(uint64_t& result) = 0;
virtual Status virtual Status
DropTableIndex(const std::string& table_id) = 0; Archive() = 0;
virtual Status virtual Status
CleanUp() = 0; CleanUp() = 0;
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "db/Constants.h" #include "db/Constants.h"
#include "db/engine/ExecutionEngine.h" #include "db/engine/ExecutionEngine.h"
#include "src/config.h"
#include <map> #include <map>
#include <memory> #include <memory>
...@@ -33,6 +34,7 @@ constexpr int32_t DEFAULT_ENGINE_TYPE = (int)EngineType::FAISS_IDMAP; ...@@ -33,6 +34,7 @@ constexpr int32_t DEFAULT_ENGINE_TYPE = (int)EngineType::FAISS_IDMAP;
constexpr int32_t DEFAULT_NLIST = 16384; constexpr int32_t DEFAULT_NLIST = 16384;
constexpr int32_t DEFAULT_METRIC_TYPE = (int)MetricType::L2; constexpr int32_t DEFAULT_METRIC_TYPE = (int)MetricType::L2;
constexpr int32_t DEFAULT_INDEX_FILE_SIZE = ONE_GB; constexpr int32_t DEFAULT_INDEX_FILE_SIZE = ONE_GB;
constexpr char CURRENT_VERSION[] = MILVUS_VERSION;
constexpr int64_t FLAG_MASK_NO_USERID = 0x1; constexpr int64_t FLAG_MASK_NO_USERID = 0x1;
constexpr int64_t FLAG_MASK_HAS_USERID = 0x1 << 1; constexpr int64_t FLAG_MASK_HAS_USERID = 0x1 << 1;
...@@ -57,6 +59,9 @@ struct TableSchema { ...@@ -57,6 +59,9 @@ struct TableSchema {
int32_t engine_type_ = DEFAULT_ENGINE_TYPE; int32_t engine_type_ = DEFAULT_ENGINE_TYPE;
int32_t nlist_ = DEFAULT_NLIST; int32_t nlist_ = DEFAULT_NLIST;
int32_t metric_type_ = DEFAULT_METRIC_TYPE; int32_t metric_type_ = DEFAULT_METRIC_TYPE;
std::string owner_table_;
std::string partition_tag_;
std::string version_ = CURRENT_VERSION;
}; // TableSchema }; // TableSchema
struct TableFileSchema { struct TableFileSchema {
......
此差异已折叠。
...@@ -49,7 +49,7 @@ class MySQLMetaImpl : public Meta { ...@@ -49,7 +49,7 @@ class MySQLMetaImpl : public Meta {
AllTables(std::vector<TableSchema>& table_schema_array) override; AllTables(std::vector<TableSchema>& table_schema_array) override;
Status Status
DeleteTable(const std::string& table_id) override; DropTable(const std::string& table_id) override;
Status Status
DeleteTableFiles(const std::string& table_id) override; DeleteTableFiles(const std::string& table_id) override;
...@@ -58,21 +58,26 @@ class MySQLMetaImpl : public Meta { ...@@ -58,21 +58,26 @@ class MySQLMetaImpl : public Meta {
CreateTableFile(TableFileSchema& file_schema) override; CreateTableFile(TableFileSchema& file_schema) override;
Status Status
DropPartitionsByDates(const std::string& table_id, const DatesT& dates) override; DropDataByDate(const std::string& table_id, const DatesT& dates) override;
Status Status
GetTableFiles(const std::string& table_id, const std::vector<size_t>& ids, TableFilesSchema& table_files) override; GetTableFiles(const std::string& table_id, const std::vector<size_t>& ids, TableFilesSchema& table_files) override;
Status
FilesByType(const std::string& table_id, const std::vector<int>& file_types,
std::vector<std::string>& file_ids) override;
Status Status
UpdateTableIndex(const std::string& table_id, const TableIndex& index) override; UpdateTableIndex(const std::string& table_id, const TableIndex& index) override;
Status Status
UpdateTableFlag(const std::string& table_id, int64_t flag) override; UpdateTableFlag(const std::string& table_id, int64_t flag) override;
Status
UpdateTableFile(TableFileSchema& file_schema) override;
Status
UpdateTableFilesToIndex(const std::string& table_id) override;
Status
UpdateTableFiles(TableFilesSchema& files) override;
Status Status
DescribeTableIndex(const std::string& table_id, TableIndex& index) override; DescribeTableIndex(const std::string& table_id, TableIndex& index) override;
...@@ -80,13 +85,16 @@ class MySQLMetaImpl : public Meta { ...@@ -80,13 +85,16 @@ class MySQLMetaImpl : public Meta {
DropTableIndex(const std::string& table_id) override; DropTableIndex(const std::string& table_id) override;
Status Status
UpdateTableFile(TableFileSchema& file_schema) override; CreatePartition(const std::string& table_id, const std::string& partition_name, const std::string& tag) override;
Status Status
UpdateTableFilesToIndex(const std::string& table_id) override; DropPartition(const std::string& partition_name) override;
Status Status
UpdateTableFiles(TableFilesSchema& files) override; ShowPartitions(const std::string& table_id, std::vector<meta::TableSchema>& partiton_schema_array) override;
Status
GetPartitionName(const std::string& table_id, const std::string& tag, std::string& partition_name) override;
Status Status
FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates, FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates,
...@@ -98,6 +106,10 @@ class MySQLMetaImpl : public Meta { ...@@ -98,6 +106,10 @@ class MySQLMetaImpl : public Meta {
Status Status
FilesToIndex(TableFilesSchema&) override; FilesToIndex(TableFilesSchema&) override;
Status
FilesByType(const std::string& table_id, const std::vector<int>& file_types,
std::vector<std::string>& file_ids) override;
Status Status
Archive() override; Archive() override;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -49,13 +49,21 @@ void ...@@ -49,13 +49,21 @@ void
SearchJob::SearchDone(size_t index_id) { SearchJob::SearchDone(size_t index_id) {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
index_files_.erase(index_id); index_files_.erase(index_id);
cv_.notify_all(); if (index_files_.empty()) {
cv_.notify_all();
}
SERVER_LOG_DEBUG << "SearchJob " << id() << " finish index file: " << index_id; SERVER_LOG_DEBUG << "SearchJob " << id() << " finish index file: " << index_id;
} }
ResultSet& ResultIds&
SearchJob::GetResult() { SearchJob::GetResultIds() {
return result_; return result_ids_;
}
ResultDistances&
SearchJob::GetResultDistances() {
return result_distances_;
} }
Status& Status&
......
此差异已折叠。
...@@ -39,8 +39,9 @@ class XSearchTask : public Task { ...@@ -39,8 +39,9 @@ class XSearchTask : public Task {
public: public:
static void static void
MergeTopkToResultSet(const std::vector<int64_t>& input_ids, const std::vector<float>& input_distance, MergeTopkToResultSet(const scheduler::ResultIds& src_ids, const scheduler::ResultDistances& src_distances,
uint64_t input_k, uint64_t nq, uint64_t topk, bool ascending, scheduler::ResultSet& result); size_t src_k, size_t nq, size_t topk, bool ascending, scheduler::ResultIds& tar_ids,
scheduler::ResultDistances& tar_distances);
// static void // static void
// MergeTopkArray(std::vector<int64_t>& tar_ids, std::vector<float>& tar_distance, uint64_t& tar_input_k, // MergeTopkArray(std::vector<int64_t>& tar_ids, std::vector<float>& tar_distance, uint64_t& tar_input_k,
......
...@@ -17,5 +17,7 @@ ...@@ -17,5 +17,7 @@
# under the License. # under the License.
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
aux_source_directory(${MILVUS_SOURCE_DIR}/src/sdk/examples/utils util_files)
add_subdirectory(grpcsimple) add_subdirectory(simple)
add_subdirectory(partition)
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include <cstring> #include <cstring>
#include <string> #include <string>
#include "src/ClientTest.h" #include "sdk/examples/simple/src/ClientTest.h"
void void
print_help(const std::string& app_name); print_help(const std::string& app_name);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册