提交 0f8ac6dc 编写于 作者: Z zhiru

temp update


Former-commit-id: 6dc00e1940e209fb5e65fb6cf31d8f6f0e0a4c1e
上级 3f43dc04
/*******************************************************************************
* Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
* Unauthorized copying of this file, via any medium is strictly prohibited.
* Proprietary and confidential.
******************************************************************************/
#pragma once
#include "Meta.h"
#include "Options.h"
namespace zilliz {
namespace milvus {
namespace engine {
namespace meta {
// auto StoragePrototype(const std::string& path);
class MySQLMetaImpl : public Meta {
public:
MySQLMetaImpl(const DBMetaOptions& options_);
virtual Status CreateTable(TableSchema& table_schema) override;
virtual Status DeleteTable(const std::string& table_id) override;
virtual Status DescribeTable(TableSchema& group_info_) override;
virtual Status HasTable(const std::string& table_id, bool& has_or_not) override;
virtual Status AllTables(std::vector<TableSchema>& table_schema_array) override;
virtual Status CreateTableFile(TableFileSchema& file_schema) override;
virtual Status DropPartitionsByDates(const std::string& table_id,
const DatesT& dates) override;
virtual Status GetTableFile(TableFileSchema& file_schema) override;
virtual Status UpdateTableFile(TableFileSchema& file_schema) override;
virtual Status UpdateTableFiles(TableFilesSchema& files) override;
virtual Status FilesToSearch(const std::string& table_id,
const DatesT& partition,
DatePartionedTableFilesSchema& files) override;
virtual Status FilesToMerge(const std::string& table_id,
DatePartionedTableFilesSchema& files) override;
virtual Status FilesToDelete(const std::string& table_id,
const DatesT& partition,
DatePartionedTableFilesSchema& files) override;
virtual Status FilesToIndex(TableFilesSchema&) override;
virtual Status Archive() override;
virtual Status Size(uint64_t& result) override;
virtual Status CleanUp() override;
virtual Status CleanUpFilesWithTTL(uint16_t seconds) override;
virtual Status DropAll() override;
virtual Status Count(const std::string& table_id, uint64_t& result) override;
virtual ~MySQLMetaImpl();
private:
Status NextFileId(std::string& file_id);
Status NextTableId(std::string& table_id);
Status DiscardFiles(long to_discard_size);
std::string GetTablePath(const std::string& table_id);
std::string GetTableDatePartitionPath(const std::string& table_id, DateT& date);
void GetTableFilePath(TableFileSchema& group_file);
Status Initialize();
const DBMetaOptions options_;
}; // DBMetaImpl
} // namespace meta
} // namespace engine
} // namespace milvus
} // namespace zilliz
......@@ -37,12 +37,6 @@ struct DBMetaOptions {
ArchiveConf archive_conf = ArchiveConf("delete");
}; // DBMetaOptions
struct MySQLMetaOptions {
std::string path;
};
struct Options {
Options();
uint16_t memory_sync_interval = 1;
......
......@@ -9,130 +9,144 @@
#include "wrapper/Operand.h"
#include "wrapper/Index.h"
#include "wrapper/IndexBuilder.h"
#include <regex>
using namespace zilliz::milvus::engine;
TEST(xxx, Wrapper_Test){
// std::string xxx = "dialect+driver://username:password@host:port/database";
TEST(operand_test, Wrapper_Test) {
using std::cout;
using std::endl;
auto opd = std::make_shared<Operand>();
opd->index_type = "IVF";
opd->preproc = "OPQ";
opd->postproc = "PQ";
opd->metric_type = "L2";
opd->d = 64;
auto opd_str = operand_to_str(opd);
auto new_opd = str_to_operand(opd_str);
// TODO: fix all place where using opd to build index.
assert(new_opd->get_index_type(10000) == opd->get_index_type(10000));
}
TEST(build_test, Wrapper_Test) {
// dimension of the vectors to index
int d = 3;
// make a set of nt training vectors in the unit cube
size_t nt = 10000;
// a reasonable number of cetroids to index nb vectors
int ncentroids = 16;
std::random_device rd;
std::mt19937 gen(rd());
std::vector<float> xb;
std::vector<long> ids;
//prepare train data
std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
std::vector<float> xt(nt * d);
for (size_t i = 0; i < nt * d; i++) {
xt[i] = dis_xt(gen);
}
//train the index
auto opd = std::make_shared<Operand>();
opd->index_type = "IVF";
opd->d = d;
opd->ncent = ncentroids;
IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd);
auto index_1 = index_builder_1->build_all(0, xb, ids, nt, xt);
ASSERT_TRUE(index_1 != nullptr);
//mysql://scott:tiger@localhost/mydatabase
// size of the database we plan to index
size_t nb = 100000;
std::string littel_xx = "dixx://";
std::regex xx_regex("([a-zA-Z0-9_-\\.]*):\\/\\/([a-zA-Z0-9_-\\.]*)\\:([a-zA-Z0-9_-\\.]*)\\@([a-zA-Z0-9_-\\.]*)\\:([0-9]*)\\/([a-zA-Z0-9_-\\.]*)");
std::smatch pieces_match;
std::regex_match(littel_xx, pieces_match, xx_regex);
//prepare raw data
xb.resize(nb);
ids.resize(nb);
for (size_t i = 0; i < nb; i++) {
xb[i] = dis_xt(gen);
ids[i] = i;
for(auto &x : pieces_match) {
std::cout << "hehhe: " << x.str() << std::endl;
}
index_1->add_with_ids(nb, xb.data(), ids.data());
//search in first quadrant
int nq = 1, k = 10;
std::vector<float> xq = {0.5, 0.5, 0.5};
float *result_dists = new float[k];
long *result_ids = new long[k];
index_1->search(nq, xq.data(), k, result_dists, result_ids);
for (int i = 0; i < k; i++) {
if (result_ids[i] < 0) {
ASSERT_TRUE(false);
break;
}
long id = result_ids[i];
std::cout << "No." << id << " [" << xb[id * 3] << ", " << xb[id * 3 + 1] << ", "
<< xb[id * 3 + 2] << "] distance = " << result_dists[i] << std::endl;
//makesure result vector is in first quadrant
ASSERT_TRUE(xb[id * 3] > 0.0);
ASSERT_TRUE(xb[id * 3 + 1] > 0.0);
ASSERT_TRUE(xb[id * 3 + 2] > 0.0);
}
delete[] result_dists;
delete[] result_ids;
}
TEST(gpu_build_test, Wrapper_Test) {
using std::vector;
int d = 256;
int nb = 3 * 1000 * 100;
int nq = 100;
vector<float> xb(d * nb);
vector<float> xq(d * nq);
vector<long> ids(nb);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
for (auto &e : xb) { e = float(dis_xt(gen)); }
for (auto &e : xq) { e = float(dis_xt(gen)); }
for (int i = 0; i < nb; ++i) { ids[i] = i; }
auto opd = std::make_shared<Operand>();
opd->index_type = "IVF";
opd->d = d;
opd->ncent = 256;
IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd);
auto index_1 = index_builder_1->build_all(nb, xb.data(), ids.data());
assert(index_1->ntotal == nb);
assert(index_1->dim == d);
// sanity check: search 5 first vectors of xb
int k = 1;
vector<long> I(5 * k);
vector<float> D(5 * k);
index_1->search(5, xb.data(), k, D.data(), I.data());
for (int i = 0; i < 5; ++i) { assert(i == I[i]); }
}
//TEST(operand_test, Wrapper_Test) {
// using std::cout;
// using std::endl;
//
// auto opd = std::make_shared<Operand>();
// opd->index_type = "IVF";
// opd->preproc = "OPQ";
// opd->postproc = "PQ";
// opd->metric_type = "L2";
// opd->d = 64;
//
// auto opd_str = operand_to_str(opd);
// auto new_opd = str_to_operand(opd_str);
//
// // TODO: fix all place where using opd to build index.
// assert(new_opd->get_index_type(10000) == opd->get_index_type(10000));
//}
//
//TEST(build_test, Wrapper_Test) {
// // dimension of the vectors to index
// int d = 3;
//
// // make a set of nt training vectors in the unit cube
// size_t nt = 10000;
//
// // a reasonable number of cetroids to index nb vectors
// int ncentroids = 16;
//
// std::random_device rd;
// std::mt19937 gen(rd());
//
// std::vector<float> xb;
// std::vector<long> ids;
//
// //prepare train data
// std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
// std::vector<float> xt(nt * d);
// for (size_t i = 0; i < nt * d; i++) {
// xt[i] = dis_xt(gen);
// }
//
// //train the index
// auto opd = std::make_shared<Operand>();
// opd->index_type = "IVF";
// opd->d = d;
// opd->ncent = ncentroids;
// IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd);
// auto index_1 = index_builder_1->build_all(0, xb, ids, nt, xt);
// ASSERT_TRUE(index_1 != nullptr);
//
// // size of the database we plan to index
// size_t nb = 100000;
//
// //prepare raw data
// xb.resize(nb);
// ids.resize(nb);
// for (size_t i = 0; i < nb; i++) {
// xb[i] = dis_xt(gen);
// ids[i] = i;
// }
// index_1->add_with_ids(nb, xb.data(), ids.data());
//
// //search in first quadrant
// int nq = 1, k = 10;
// std::vector<float> xq = {0.5, 0.5, 0.5};
// float *result_dists = new float[k];
// long *result_ids = new long[k];
// index_1->search(nq, xq.data(), k, result_dists, result_ids);
//
// for (int i = 0; i < k; i++) {
// if (result_ids[i] < 0) {
// ASSERT_TRUE(false);
// break;
// }
//
// long id = result_ids[i];
// std::cout << "No." << id << " [" << xb[id * 3] << ", " << xb[id * 3 + 1] << ", "
// << xb[id * 3 + 2] << "] distance = " << result_dists[i] << std::endl;
//
// //makesure result vector is in first quadrant
// ASSERT_TRUE(xb[id * 3] > 0.0);
// ASSERT_TRUE(xb[id * 3 + 1] > 0.0);
// ASSERT_TRUE(xb[id * 3 + 2] > 0.0);
// }
//
// delete[] result_dists;
// delete[] result_ids;
//}
//
//TEST(gpu_build_test, Wrapper_Test) {
// using std::vector;
//
// int d = 256;
// int nb = 3 * 1000 * 100;
// int nq = 100;
// vector<float> xb(d * nb);
// vector<float> xq(d * nq);
// vector<long> ids(nb);
//
// std::random_device rd;
// std::mt19937 gen(rd());
// std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
// for (auto &e : xb) { e = float(dis_xt(gen)); }
// for (auto &e : xq) { e = float(dis_xt(gen)); }
// for (int i = 0; i < nb; ++i) { ids[i] = i; }
//
// auto opd = std::make_shared<Operand>();
// opd->index_type = "IVF";
// opd->d = d;
// opd->ncent = 256;
//
// IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd);
// auto index_1 = index_builder_1->build_all(nb, xb.data(), ids.data());
// assert(index_1->ntotal == nb);
// assert(index_1->dim == d);
//
// // sanity check: search 5 first vectors of xb
// int k = 1;
// vector<long> I(5 * k);
// vector<float> D(5 * k);
// index_1->search(5, xb.data(), k, D.data(), I.data());
// for (int i = 0; i < 5; ++i) { assert(i == I[i]); }
//}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册