diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index e2246c95cd0ceee231852511abf164ecfda276a6..b92518d6961f3ba2abcfa7b53a3f354aff0b5923 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -124,8 +124,15 @@ Status DBImpl::merge_files(const std::string& group_id, const meta::DateT& date, updated.push_back(file_schema); } + auto index_size = group_file.dimension * index->ntotal; faiss::write_index(index.get(), group_file.location.c_str()); - group_file.file_type = meta::GroupFileSchema::RAW; + + std::cout << "Merged size=" << index_size << std::endl; + if (index_size >= _options.index_trigger_size) { + group_file.file_type = meta::GroupFileSchema::TO_INDEX; + } else { + group_file.file_type = meta::GroupFileSchema::RAW; + } updated.push_back(group_file); status = _pMeta->update_files(updated); diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index ed0382b1619b467db99a80b2f89e065bca090c5f..7a0bef88a3fda5847c5dd3af3c0034282ae6c75c 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -33,7 +33,7 @@ inline auto StoragePrototype(const std::string& path) { } -using ConnectorT = decltype(StoragePrototype("")); +using ConnectorT = decltype(StoragePrototype("/tmp/dummy.sqlite3")); static std::unique_ptr ConnectorPtr; long GetFileSize(const std::string& filename) @@ -77,6 +77,8 @@ Status DBMetaImpl::initialize() { ConnectorPtr->sync_schema(); + cleanup(); + return Status::OK(); } @@ -211,6 +213,7 @@ Status DBMetaImpl::files_to_index(GroupFilesSchema& files) { group_file.file_type = std::get<3>(file); group_file.rows = std::get<4>(file); group_file.date = std::get<5>(file); + GetGroupFilePath(group_file); auto groupItr = groups.find(group_file.group_id); if (groupItr == groups.end()) { GroupSchema group_info; @@ -316,6 +319,42 @@ Status DBMetaImpl::update_files(const GroupFilesSchema& files) { return Status::OK(); } +Status DBMetaImpl::cleanup() { + std::unique_lock lk(mutex_); + auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id, + &GroupFileSchema::group_id, + &GroupFileSchema::file_id, + &GroupFileSchema::file_type, + &GroupFileSchema::rows, + &GroupFileSchema::date), + where(c(&GroupFileSchema::file_type) == (int)GroupFileSchema::TO_DELETE or + c(&GroupFileSchema::file_type) == (int)GroupFileSchema::NEW)); + + GroupFilesSchema updated; + + for (auto& file : selected) { + GroupFileSchema group_file; + group_file.id = std::get<0>(file); + group_file.group_id = std::get<1>(file); + group_file.file_id = std::get<2>(file); + group_file.file_type = std::get<3>(file); + group_file.rows = std::get<4>(file); + group_file.date = std::get<5>(file); + GetGroupFilePath(group_file); + if (group_file.file_type == GroupFileSchema::TO_DELETE) { + boost::filesystem::remove(group_file.location); + } + ConnectorPtr->remove(group_file.id); + std::cout << "Removing id=" << group_file.id << " location=" << group_file.location << std::endl; + } + + return Status::OK(); +} + +DBMetaImpl::~DBMetaImpl() { + cleanup(); +} + } // namespace meta } // namespace engine } // namespace vecwise diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index 3011c700f12709f96cb2e16b0681ca385145b496..5fa10e986e8c94d68766b77acfff4a449cd792a0 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -42,6 +42,10 @@ public: virtual Status files_to_index(GroupFilesSchema&) override; + virtual Status cleanup() override; + + virtual ~DBMetaImpl(); + private: Status get_group_no_lock(GroupSchema& group_info); diff --git a/cpp/src/db/LocalMetaImpl.cpp b/cpp/src/db/LocalMetaImpl.cpp index d39b2fee5a777a21ed69471f45bb1faba9a650f3..7bdb5076471a2eada747f771dfdfc562524088e1 100644 --- a/cpp/src/db/LocalMetaImpl.cpp +++ b/cpp/src/db/LocalMetaImpl.cpp @@ -236,6 +236,11 @@ Status LocalMetaImpl::update_files(const GroupFilesSchema& files) { return Status::OK(); } +Status LocalMetaImpl::cleanup() { + //PXU TODO + return Status::OK(); +} + } // namespace meta } // namespace engine } // namespace vecwise diff --git a/cpp/src/db/LocalMetaImpl.h b/cpp/src/db/LocalMetaImpl.h index bd64970cda08358a623fe7c47e86520be2bb54dd..f74916c0878e6e8c0a3e3b0acad49ee156061e73 100644 --- a/cpp/src/db/LocalMetaImpl.h +++ b/cpp/src/db/LocalMetaImpl.h @@ -34,6 +34,8 @@ public: virtual Status update_files(const GroupFilesSchema& files) override; + virtual Status cleanup() override; + virtual Status files_to_merge(const std::string& group_id, DatePartionedGroupFilesSchema& files) override; diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index 510cfc837267099b1e9db2038dddce217e5ce6f7..84ba414708248fae3b43146f33180c08f40f2a7d 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -75,6 +75,8 @@ public: virtual Status files_to_index(GroupFilesSchema&) = 0; + virtual Status cleanup() = 0; + static DateT GetDate(const std::time_t& t); static DateT GetDate(); diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index fd64f20e5df95a680402102a391912734602b2a1..1af20e42a6ac4644e923f10d254d9b999d5e6f59 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -20,7 +20,7 @@ struct Options { Options(); uint16_t memory_sync_interval = 10; uint16_t merge_trigger_number = 100; - size_t index_trigger_size = 1024*1024*1024; + size_t index_trigger_size = 1024*1024*256; Env* env; DBMetaOptions meta; }; // Options