From 969fd9281d3f5922ca52fedb305fbe4a39ee43d8 Mon Sep 17 00:00:00 2001 From: superjom Date: Fri, 17 Nov 2017 16:49:40 +0800 Subject: [PATCH] init information maintainer --- CMakeLists.txt | 14 ++++- visualdl/backend/logic/im.cc | 74 ++++++++++++++++++++++++++ visualdl/backend/logic/im.h | 55 +++++++++++++++++++ visualdl/backend/logic/im_test.cc | 30 +++++++++++ visualdl/backend/storage/storage.cc | 34 +++++++++--- visualdl/backend/storage/storage.h | 44 ++++++++------- visualdl/backend/storage/storage.proto | 2 + 7 files changed, 224 insertions(+), 29 deletions(-) create mode 100644 visualdl/backend/logic/im.cc create mode 100644 visualdl/backend/logic/im.h create mode 100644 visualdl/backend/logic/im_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index bb45c59e..f944d7d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,9 +5,10 @@ set(CMAKE_CXX_STANDARD 11) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(thirdparty/local/include) -link_directories(thirdparty/local/lib) add_subdirectory(thirdparty/pybind11-2.2.1) +link_directories(thirdparty/local/lib) + set(SOURCE_FILES visualdl/backend/storage/storage.cc visualdl/backend/storage/storage.h visualdl/backend/storage/storage.pb.h @@ -16,4 +17,13 @@ set(SOURCE_FILES add_library(storage visualdl/backend/storage/storage.cc visualdl/backend/storage/storage.pb.cc) -add_executable(VisualDL ${SOURCE_FILES}) \ No newline at end of file +add_library(c_api visualdl/backend/logic/c_api.cc) +add_library(sdk visualdl/backend/logic/sdk.cc) +add_library(im visualdl/backend/logic/im.cc) + +add_executable(vl_test + visualdl/backend/test.cc + visualdl/backend/logic/im_test.cc) +target_link_libraries(vl_test storage im gtest glog protobuf gflags) +add_executable(VisualDL ${SOURCE_FILES}) + diff --git a/visualdl/backend/logic/im.cc b/visualdl/backend/logic/im.cc new file mode 100644 index 00000000..5f567daa --- /dev/null +++ b/visualdl/backend/logic/im.cc @@ -0,0 +1,74 @@ +#include +#include + +#include "visualdl/backend/logic/im.h" + +namespace visualdl { + +/* + * @num_samples: number of instances to sample + * @size: counter of the records. + * @returns: id of the instance to replace, if drop this instance, return -1. + */ +int ReserviorSample(int num_samples, int num_records) { + if (num_records <= num_samples) { + return num_records; + } + + std::srand(std::time(0)); + float prob = static_cast(std::rand()) / RAND_MAX; + float receive_prob = static_cast(num_samples) / num_records; + if (prob < receive_prob) { + int offset2replace = std::rand() % num_samples; + return offset2replace; + } + return -1; +} + +void InformationMaintainer::SetPersistDest(const std::string &path) { + CHECK(storage_.mutable_data()->dir().empty()) + << "duplicate set storage's path"; + storage_.mutable_data()->set_dir(path); +} + +storage::Tablet *InformationMaintainer::AddTablet(const std::string &tag, + int num_samples) { + auto *tablet = storage_.Find(tag); + if (!tablet) { + tablet = storage_.Add(tag, num_samples); + } + return tablet; +} + +void InformationMaintainer::AddRecord(const std::string &tag, + storage::Tablet::Type type, + const std::string &data, + storage::DataType dtype) { + auto *tablet = storage_.Find(tag); + CHECK(tablet); + + auto num_records = tablet->num_records(); + const auto num_samples = tablet->num_samples(); + const auto offset = ReserviorSample(num_samples, num_records + 1); + if (offset < 0) + return; + + storage::Record *record; + if (offset >= num_records) { + record = storage_.NewRecord(tag); + } else { + record = storage_.GetRecord(tag, offset); + } + + record->set_dtype(dtype); + record->ParseFromString(data); + + tablet->set_num_records(num_records + 1); +} + +void InformationMaintainer::PersistToDisk() { + CHECK(!storage_.data().dir().empty()) << "path of storage should be set"; + storage_.Save(storage_.data().dir()); +} + +} // namespace visualdl diff --git a/visualdl/backend/logic/im.h b/visualdl/backend/logic/im.h new file mode 100644 index 00000000..ce85042d --- /dev/null +++ b/visualdl/backend/logic/im.h @@ -0,0 +1,55 @@ +#ifndef VISUALDL_BACKEND_LOGIC_IM_H +#define VISUALDL_BACKEND_LOGIC_IM_H + +#include + +#include "visualdl/backend/storage/storage.h" + +namespace visualdl { + +/* + * Maintain the Storage singleton in memory, pre-compute some the statical + * information to help visualizaton. + */ +class InformationMaintainer final { +public: + InformationMaintainer() {} + + static InformationMaintainer &Global() { + static InformationMaintainer *x = new InformationMaintainer(); + return *x; + } + + /* + * Set the disk path to store the Storage object. + */ + void SetPersistDest(const std::string &path); + + storage::Tablet *AddTablet(const std::string &tag, int num_samples); + + /* + * @tag: tag of the target Tablet. + * @type: type of target Tablet. + * @data: serialized protobuf message. + * + * NOTE pass in the serialized protobuf message will trigger copying, but + * simpler to support different Tablet data formats. + */ + void AddRecord(const std::string &tag, storage::Tablet::Type type, + const std::string &data, + storage::DataType dtype = storage::DataType::kUnknown); + + /* + * Save the Storage Protobuf to disk. + */ + void PersistToDisk(); + + Storage &storage() { return storage_; } + +private: + Storage storage_; +}; + +} // namespace visualdl + +#endif // VISUALDL_BACKEND_LOGIC_IM_H diff --git a/visualdl/backend/logic/im_test.cc b/visualdl/backend/logic/im_test.cc new file mode 100644 index 00000000..83bd9003 --- /dev/null +++ b/visualdl/backend/logic/im_test.cc @@ -0,0 +1,30 @@ +#include "visualdl/backend/logic/im.h" + +#include "gtest/gtest.h" + +namespace visualdl { + +class ImTester : public ::testing::Test { +protected: + void SetUp() override {} + + InformationMaintainer &im = InformationMaintainer::Global(); +}; + +TEST_F(ImTester, AddTablet) { im.AddTablet("tag0", 20); } + +TEST_F(ImTester, AddRecord) { + storage::Record rcd; + rcd.set_dtype(storage::DataType::kInt32s); + for (int i = 0; i < 100; i++) { + for (int j = 0; j < 10; j++) { + rcd.mutable_data()->add_i32s(i * 20 + j); + } + im.AddRecord("tag0", storage::Tablet::Type::Tablet_Type_kGraph, + rcd.SerializeAsString()); + } + + ASSERT_EQ(im.storage().Find("tag0")->records_size(), 20UL); +} + +} // namespace visualdl diff --git a/visualdl/backend/storage/storage.cc b/visualdl/backend/storage/storage.cc index 5af049d9..5a89aa49 100644 --- a/visualdl/backend/storage/storage.cc +++ b/visualdl/backend/storage/storage.cc @@ -5,18 +5,38 @@ namespace visualdl { -storage::Tablet *Storage::Add(const std::string &tag) { - return &proto_.mutable_tablets()->at(tag); +storage::Tablet *Storage::Add(const std::string &tag, int num_samples) { + auto *tablet = &(*proto_.mutable_tablets())[tag]; + tablet->set_num_samples(num_samples); + return tablet; } -const storage::Tablet *Storage::Find(const std::string &tag) const { - auto it = proto_.tablets().find(tag); +storage::Tablet *Storage::Find(const std::string &tag) { + auto it = proto_.mutable_tablets()->find(tag); if (it != proto_.tablets().end()) { return &it->second; } return nullptr; } +storage::Record *Storage::NewRecord(const std::string &tag) { + auto *tablet = Find(tag); + CHECK(tablet) << "Tablet" << tag << " should be create first"; + auto *record = tablet->mutable_records()->Add(); + // increase num_records + int num_records = tablet->num_records(); + tablet->set_num_records(num_records + 1); + return record; +} +storage::Record *Storage::GetRecord(const std::string &tag, int offset) { + auto *tablet = Find(tag); + CHECK(tablet) << "Tablet" << tag << " should be create first"; + + auto num_records = tablet->num_records(); + CHECK_LT(offset, num_records) << "invalid offset"; + return tablet->mutable_records()->Mutable(offset); +} + void Storage::Save(const std::string &path) const { std::ofstream file(path, file.binary | file.out); CHECK(file.is_open()) << "can't open path " << path; @@ -34,12 +54,10 @@ void Storage::Load(const std::string &path) { DeSerialize(buffer); } -std::string Storage::Serialize() const { - return proto_.SerializeAsString(); -} +std::string Storage::Serialize() const { return proto_.SerializeAsString(); } void Storage::DeSerialize(const std::string &data) { proto_.ParseFromString(data); } -} // namespace visualdl +} // namespace visualdl diff --git a/visualdl/backend/storage/storage.h b/visualdl/backend/storage/storage.h index aec08a87..a623eb0f 100644 --- a/visualdl/backend/storage/storage.h +++ b/visualdl/backend/storage/storage.h @@ -9,24 +9,33 @@ namespace visualdl { class Storage final { - public: - /* - * There should be only one Storage instance in memory. - */ - Storage &Global() { - static Storage *instance = new Storage(); - return *instance; +public: + Storage() { + // set time stamp + time_t time0; + time(&time0); + proto_.set_timestamp(time0); } /* * Add a new tablet named `tag`, the newly added instance will be returned. */ - storage::Tablet *Add(const std::string &tag); + storage::Tablet *Add(const std::string &tag, int num_samples); /* * Search the tablet named `tag`, if not exist, return nullptr. */ - const storage::Tablet *Find(const std::string &tag) const; + storage::Tablet *Find(const std::string &tag); + + /* + * Append a new record to the tail of tablet. + */ + storage::Record *NewRecord(const std::string &tag); + + /* + * Get a record at `offset`, if the offset is not valid, yield a failed CHECK. + */ + storage::Record *GetRecord(const std::string &tag, int offset); /* * Serialize this object to string and save it to a file. @@ -38,7 +47,11 @@ class Storage final { */ void Load(const std::string &path); - protected: + storage::Storage *mutable_data() { return &proto_; } + + const storage::Storage &data() { return proto_; } + +protected: /* * Serialize the Storage instance to string. */ @@ -49,17 +62,10 @@ class Storage final { */ void DeSerialize(const std::string &data); - Storage() { - // set time stamp - time_t time0; - time(&time0); - proto_.set_timestamp(time0); - } - - private: +private: storage::Storage proto_; }; } // namespace visualdl -#endif //VISUALDL_STORAGE_H +#endif // VISUALDL_STORAGE_H diff --git a/visualdl/backend/storage/storage.proto b/visualdl/backend/storage/storage.proto index 3cc1aa0c..2b5729f8 100644 --- a/visualdl/backend/storage/storage.proto +++ b/visualdl/backend/storage/storage.proto @@ -16,6 +16,8 @@ enum DataType { kStrings = 9; kInt32s = 10; kBools = 11; + + kUnknown = 12; } // A data array, which type is `type`. -- GitLab