提交 44f5d3e7 编写于 作者: X Xu Peng

feat(db): meta operation


Former-commit-id: 6f7cd4ec069195d659585b321fc5f6e6f20bb719
上级 6e1e2828
......@@ -104,7 +104,9 @@ void DBImpl::background_call() {
Status DBImpl::merge_files(const std::string& group_id, const meta::DateT& date,
const meta::GroupFilesSchema& files) {
meta::GroupFileSchema group_file;
Status status = _pMeta->add_group_file(group_id, date, group_file);
group_file.group_id = group_id;
group_file.date = date;
Status status = _pMeta->add_group_file(group_file);
if (!status.ok()) {
return status;
}
......@@ -161,8 +163,9 @@ Status DBImpl::background_merge_files(const std::string& group_id) {
Status DBImpl::build_index(const meta::GroupFileSchema& file) {
meta::GroupFileSchema group_file;
Status status = _pMeta->add_group_file(file.group_id, file.date,
group_file, meta::GroupFileSchema::INDEX);
group_file.group_id = file.group_id;
group_file.date = file.date;
Status status = _pMeta->add_group_file(group_file);
if (!status.ok()) {
return status;
}
......@@ -176,16 +179,14 @@ Status DBImpl::build_index(const meta::GroupFileSchema& file) {
dynamic_cast<faiss::IndexFlat*>(from_index->index)->xb.data(),
from_index->id_map.data());
/* std::cout << "raw size=" << from_index->ntotal << " index size=" << index->ntotal << std::endl; */
// PXU TODO: Remove
/* auto location = group_file.location + ".index"; */
write_index(index, group_file.location.c_str());
group_file.file_type = meta::GroupFileSchema::INDEX;
/* auto to_remove = file; */
/* to_remove.file_type = TO_DELETE; */
auto to_remove = file;
to_remove.file_type = meta::GroupFileSchema::TO_DELETE;
/* GroupFilesSchema update_files = {to_remove, group_file}; */
/* _pMeta->update_files(update_files); */
meta::GroupFilesSchema update_files = {to_remove, group_file};
_pMeta->update_files(update_files);
return Status::OK();
}
......
......@@ -17,11 +17,19 @@ using namespace sqlite_orm;
inline auto StoragePrototype(const std::string& path) {
return make_storage(path,
make_table("Groups",
make_table("Group",
make_column("id", &GroupSchema::id, primary_key()),
make_column("group_id", &GroupSchema::group_id, unique()),
make_column("dimension", &GroupSchema::dimension),
make_column("files_cnt", &GroupSchema::files_cnt, default_value(0))));
make_column("files_cnt", &GroupSchema::files_cnt, default_value(0))),
make_table("GroupFile",
make_column("id", &GroupFileSchema::id, primary_key()),
make_column("group_id", &GroupFileSchema::group_id),
make_column("file_id", &GroupFileSchema::file_id),
make_column("file_type", &GroupFileSchema::file_type),
make_column("rows", &GroupFileSchema::rows, default_value(0)),
make_column("date", &GroupFileSchema::date))
);
}
......@@ -35,6 +43,26 @@ long GetFileSize(const std::string& filename)
return rc == 0 ? stat_buf.st_size : -1;
}
std::string DBMetaImpl::GetGroupPath(const std::string& group_id) {
return _options.path + "/" + group_id;
}
std::string DBMetaImpl::GetGroupDatePartitionPath(const std::string& group_id, DateT& date) {
std::stringstream ss;
ss << GetGroupPath(group_id) << "/" << date;
return ss.str();
}
void DBMetaImpl::GetGroupFilePath(GroupFileSchema& group_file) {
if (group_file.date == EmptyDate) {
group_file.date = Meta::GetDate();
}
std::stringstream ss;
ss << GetGroupDatePartitionPath(group_file.group_id, group_file.date)
<< "/" << group_file.file_id;
group_file.location = ss.str();
}
DBMetaImpl::DBMetaImpl(const DBMetaOptions& options_)
: _options(options_) {
initialize();
......@@ -103,74 +131,105 @@ Status DBMetaImpl::has_group(const std::string& group_id, bool& has_or_not) {
return Status::OK();
}
Status DBMetaImpl::add_group_file(const std::string& group_id,
GroupFileSchema& group_file_info,
GroupFileSchema::FILE_TYPE file_type) {
return add_group_file(group_id, Meta::GetDate(), group_file_info);
}
Status DBMetaImpl::add_group_file(GroupFileSchema& group_file) {
if (group_file.date == EmptyDate) {
group_file.date = Meta::GetDate();
}
GroupSchema group_info;
group_info.group_id = group_file.group_id;
auto status = get_group(group_info);
if (!status.ok()) {
return status;
}
Status DBMetaImpl::add_group_file(const std::string& group_id,
DateT date,
GroupFileSchema& group_file_info,
GroupFileSchema::FILE_TYPE file_type) {
//PXU TODO
std::stringstream ss;
SimpleIDGenerator g;
std::string suffix = (file_type == GroupFileSchema::RAW) ? ".raw" : ".index";
/* ss << "/tmp/test/" << date */
ss << _options.path << "/" << date
<< "/" << g.getNextIDNumber()
<< suffix;
group_file_info.group_id = "1";
group_file_info.dimension = 64;
group_file_info.location = ss.str();
group_file_info.date = date;
group_file.file_type = GroupFileSchema::NEW;
group_file.file_id = g.getNextIDNumber();
group_file.dimension = group_info.dimension;
GetGroupFilePath(group_file);
try {
auto id = ConnectorPtr->insert(group_file);
std::cout << __func__ << " id=" << id << std::endl;
group_info.id = id;
} catch(std::system_error& e) {
return Status::GroupError("Add GroupFile Group "
+ group_info.group_id + " File " + group_file.file_id + " Error");
}
return Status::OK();
}
Status DBMetaImpl::files_to_index(GroupFilesSchema& files) {
// PXU TODO
files.clear();
std::stringstream ss;
/* ss << "/tmp/test/" << Meta::GetDate(); */
ss << _options.path << "/" << Meta::GetDate();
boost::filesystem::path path(ss.str().c_str());
boost::filesystem::directory_iterator end_itr;
for (boost::filesystem::directory_iterator itr(path); itr != end_itr; ++itr) {
/* std::cout << itr->path().string() << std::endl; */
GroupFileSchema f;
f.location = itr->path().string();
std::string suffixStr = f.location.substr(f.location.find_last_of('.') + 1);
if (suffixStr == "index") continue;
if (1024*1024*1000 >= GetFileSize(f.location)) continue;
std::cout << "[About to index] " << f.location << std::endl;
f.date = Meta::GetDate();
files.push_back(f);
auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id,
&GroupFileSchema::group_id,
&GroupFileSchema::file_id,
&GroupFileSchema::file_type,
&GroupFileSchema::rows,
&GroupFileSchema::date),
where(c(&GroupFileSchema::file_type) == (int)GroupFileSchema::TO_INDEX));
std::map<std::string, GroupSchema> groups;
for (auto& file : selected) {
GroupFileSchema group_file;
group_file.id = std::get<0>(file);
group_file.group_id = std::get<1>(file);
group_file.file_id = std::get<2>(file);
group_file.file_type = std::get<3>(file);
group_file.rows = std::get<4>(file);
group_file.date = std::get<5>(file);
auto groupItr = groups.find(group_file.group_id);
if (groupItr == groups.end()) {
GroupSchema group_info;
group_info.group_id = group_file.group_id;
auto status = get_group(group_info);
if (!status.ok()) {
return status;
}
groups[group_file.group_id] = group_info;
}
group_file.dimension = groups[group_file.group_id].dimension;
files.push_back(group_file);
}
return Status::OK();
}
Status DBMetaImpl::files_to_merge(const std::string& group_id,
DatePartionedGroupFilesSchema& files) {
//PXU TODO
files.clear();
std::stringstream ss;
/* ss << "/tmp/test/" << Meta::GetDate(); */
ss << _options.path << "/" << Meta::GetDate();
boost::filesystem::path path(ss.str().c_str());
boost::filesystem::directory_iterator end_itr;
GroupFilesSchema gfiles;
DateT date = Meta::GetDate();
files[date] = gfiles;
for (boost::filesystem::directory_iterator itr(path); itr != end_itr; ++itr) {
/* std::cout << itr->path().string() << std::endl; */
GroupFileSchema f;
f.location = itr->path().string();
std::string suffixStr = f.location.substr(f.location.find_last_of('.') + 1);
if (suffixStr == "index") continue;
if (1024*1024*1000 < GetFileSize(f.location)) continue;
std::cout << "About to merge " << f.location << std::endl;
files[date].push_back(f);
auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id,
&GroupFileSchema::group_id,
&GroupFileSchema::file_id,
&GroupFileSchema::file_type,
&GroupFileSchema::rows,
&GroupFileSchema::date));
/* where(is_equal(&GroupFileSchema::file_type, GroupFileSchema::RAW) && */
/* is_equal(&GroupFileSchema::group_id, group_id))); */
GroupSchema group_info;
group_info.group_id = group_id;
auto status = get_group(group_info);
if (!status.ok()) {
return status;
}
for (auto& file : selected) {
GroupFileSchema group_file;
group_file.id = std::get<0>(file);
group_file.group_id = std::get<1>(file);
group_file.file_id = std::get<2>(file);
group_file.file_type = std::get<3>(file);
group_file.rows = std::get<4>(file);
group_file.date = std::get<5>(file);
group_file.dimension = group_info.dimension;
auto dateItr = files.find(group_file.date);
if (dateItr == files.end()) {
files[group_file.date] = GroupFilesSchema();
}
files[group_file.date].push_back(group_file);
}
return Status::OK();
......@@ -203,7 +262,15 @@ Status DBMetaImpl::update_group_file(const GroupFileSchema& group_file_) {
}
Status DBMetaImpl::update_files(const GroupFilesSchema& files) {
//PXU TODO
auto commited = ConnectorPtr->transaction([&] () mutable {
for (auto& file : files) {
ConnectorPtr->update(file);
}
return true;
});
if (!commited) {
return Status::DBTransactionError("Update files Error");
}
return Status::OK();
}
......
......@@ -19,14 +19,7 @@ public:
virtual Status get_group(GroupSchema& group_info_) override;
virtual Status has_group(const std::string& group_id_, bool& has_or_not_) override;
virtual Status add_group_file(const std::string& group_id,
DateT date,
GroupFileSchema& group_file_info,
GroupFileSchema::FILE_TYPE file_type=GroupFileSchema::RAW) override;
virtual Status add_group_file(const std::string& group_id_,
GroupFileSchema& group_file_info_,
GroupFileSchema::FILE_TYPE file_type=GroupFileSchema::RAW) override;
virtual Status add_group_file(GroupFileSchema& group_file_info) override;
virtual Status has_group_file(const std::string& group_id_,
const std::string& file_id_,
......@@ -49,6 +42,9 @@ public:
private:
std::string GetGroupPath(const std::string& group_id);
std::string GetGroupDatePartitionPath(const std::string& group_id, DateT& date);
void GetGroupFilePath(GroupFileSchema& group_file);
Status initialize();
const DBMetaOptions _options;
......
......@@ -133,26 +133,17 @@ Status LocalMetaImpl::has_group(const std::string& group_id, bool& has_or_not) {
return Status::OK();
}
Status LocalMetaImpl::add_group_file(const std::string& group_id,
GroupFileSchema& group_file_info,
GroupFileSchema::FILE_TYPE file_type) {
return add_group_file(group_id, Meta::GetDate(), group_file_info);
}
Status LocalMetaImpl::add_group_file(const std::string& group_id,
DateT date,
GroupFileSchema& group_file_info,
GroupFileSchema::FILE_TYPE file_type) {
Status LocalMetaImpl::add_group_file(GroupFileSchema& group_file_info) {
GroupSchema group_info;
auto status = get_group(group_info);
if (!status.ok()) {
return status;
}
auto location = GetNextGroupFileLocationByPartition(group_id, date, file_type);
group_file_info.group_id = group_id;
group_file_info.dimension = group_info.dimension;
group_file_info.location = location;
group_file_info.date = date;
/* auto status = get_group(group_info); */
/* if (!status.ok()) { */
/* return status; */
/* } */
/* auto location = GetNextGroupFileLocationByPartition(group_id, date, file_type); */
/* group_file_info.group_id = group_id; */
/* group_file_info.dimension = group_info.dimension; */
/* group_file_info.location = location; */
/* group_file_info.date = date; */
return Status::OK();
}
......
......@@ -18,14 +18,7 @@ public:
virtual Status get_group(GroupSchema& group_info_) override;
virtual Status has_group(const std::string& group_id_, bool& has_or_not_) override;
virtual Status add_group_file(const std::string& group_id,
DateT date,
GroupFileSchema& group_file_info,
GroupFileSchema::FILE_TYPE file_type=GroupFileSchema::RAW) override;
virtual Status add_group_file(const std::string& group_id_,
GroupFileSchema& group_file_info_,
GroupFileSchema::FILE_TYPE file_type=GroupFileSchema::RAW) override;
virtual Status add_group_file(GroupFileSchema& group_file_info) = 0;
virtual Status has_group_file(const std::string& group_id_,
const std::string& file_id_,
......
......@@ -72,7 +72,8 @@ VectorsPtr MemManager::get_mem_by_group(const std::string& group_id) {
}
meta::GroupFileSchema group_file;
auto status = _pMeta->add_group_file(group_id, group_file);
group_file.group_id = group_id;
auto status = _pMeta->add_group_file(group_file);
if (!status.ok()) {
return nullptr;
}
......
......@@ -14,6 +14,7 @@ namespace engine {
namespace meta {
typedef int DateT;
const DateT EmptyDate = -1;
struct GroupSchema {
size_t id;
......@@ -28,6 +29,7 @@ struct GroupFileSchema {
typedef enum {
NEW,
RAW,
TO_INDEX,
INDEX,
TO_DELETE,
} FILE_TYPE;
......@@ -37,7 +39,7 @@ struct GroupFileSchema {
std::string file_id;
int file_type = NEW;
size_t rows;
DateT date;
DateT date = EmptyDate;
uint16_t dimension;
std::string location = "";
}; // GroupFileSchema
......@@ -52,13 +54,7 @@ public:
virtual Status get_group(GroupSchema& group_info) = 0;
virtual Status has_group(const std::string& group_id_, bool& has_or_not_) = 0;
virtual Status add_group_file(const std::string& group_id_,
GroupFileSchema& group_file_info_,
GroupFileSchema::FILE_TYPE file_type=GroupFileSchema::RAW) = 0;
virtual Status add_group_file(const std::string& group_id,
DateT date,
GroupFileSchema& group_file_info,
GroupFileSchema::FILE_TYPE file_type=GroupFileSchema::RAW) = 0;
virtual Status add_group_file(GroupFileSchema& group_file_info) = 0;
virtual Status has_group_file(const std::string& group_id_,
const std::string& file_id_,
......
......@@ -28,6 +28,9 @@ public:
static Status GroupError(const std::string& msg, const std::string& msg2="") {
return Status(kGroupError, msg, msg2);
}
static Status DBTransactionError(const std::string& msg, const std::string& msg2="") {
return Status(kDBTransactionError, msg, msg2);
}
bool ok() const { return state_ == nullptr; }
......@@ -35,6 +38,7 @@ public:
bool IsInvalidDBPath() const { return code() == kInvalidDBPath; }
bool IsGroupError() const { return code() == kGroupError; }
bool IsDBTransactionError() const { return code() == kDBTransactionError; }
std::string ToString() const;
......@@ -47,6 +51,7 @@ private:
kInvalidDBPath,
kGroupError,
kDBTransactionError,
};
Code code() const {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册