提交 8dbaac0f 编写于 作者: Y Yan Chunwei 提交者: GitHub

Merge pull request #40 from Superjom/feature/refactor_storage_interface

......@@ -30,5 +30,6 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/visualdl/backend/logic)
add_executable(vl_test
${PROJECT_SOURCE_DIR}/visualdl/backend/test.cc
${PROJECT_SOURCE_DIR}/visualdl/backend/storage/storage_test.cc
${PROJECT_SOURCE_DIR}/visualdl/backend/logic/im_test.cc)
target_link_libraries(vl_test storage im gtest glog protobuf gflags)
target_link_libraries(vl_test storage im gtest glog protobuf gflags pthread)
......@@ -26,24 +26,21 @@ int ReserviorSample(int num_samples, int num_records) {
}
void InformationMaintainer::SetPersistDest(const std::string &path) {
CHECK(storage_.mutable_data()->dir().empty())
CHECK(storage_->mutable_data()->dir().empty())
<< "duplicate set storage's path";
storage_.mutable_data()->set_dir(path);
storage_->mutable_data()->set_dir(path);
}
storage::Tablet *InformationMaintainer::AddTablet(const std::string &tag,
int num_samples) {
auto *tablet = storage_.Find(tag);
if (!tablet) {
tablet = storage_.Add(tag, num_samples);
}
auto tablet = storage_->NewTablet(tag, num_samples);
return tablet;
}
void InformationMaintainer::AddRecord(const std::string &tag,
const storage::Record &data) {
auto *tablet = storage_.Find(tag);
CHECK(tablet);
auto *tablet = storage_->tablet(tag);
CHECK(tablet) << "no tablet called " << tag;
auto num_records = tablet->total_records();
const auto num_samples = tablet->num_samples();
......@@ -59,9 +56,9 @@ void InformationMaintainer::AddRecord(const std::string &tag,
storage::Record *record;
if (offset >= num_records) {
record = storage_.NewRecord(tag);
record = tablet->add_records();
} else {
record = storage_.GetRecord(tag, offset);
record = tablet->mutable_records(offset);
}
*record = data;
......@@ -76,10 +73,10 @@ void InformationMaintainer::Clear() {
}
void InformationMaintainer::PersistToDisk() {
CHECK(!storage_.data().dir().empty()) << "path of storage should be set";
CHECK(!storage_->data().dir().empty()) << "path of storage should be set";
// TODO make dir first
// MakeDir(storage_.data().dir());
storage_.Save(storage_.data().dir() + "/storage.pb");
storage_->PersistToDisk();
}
} // namespace visualdl
#ifndef VISUALDL_BACKEND_LOGIC_IM_H
#define VISUALDL_BACKEND_LOGIC_IM_H
#include <glog/logging.h>
#include <memory>
#include <string>
#include "visualdl/backend/storage/storage.h"
......@@ -13,7 +15,15 @@ namespace visualdl {
*/
class InformationMaintainer final {
public:
InformationMaintainer() {}
InformationMaintainer(StorageBase::Type type = StorageBase::Type::kMemory) {
switch (type) {
case StorageBase::Type::kMemory:
storage_.reset(new MemoryStorage);
break;
default:
CHECK(false) << "Unsupported storage kind " << type;
}
}
static InformationMaintainer &Global() {
static InformationMaintainer *x = new InformationMaintainer();
......@@ -46,10 +56,10 @@ public:
*/
void PersistToDisk();
Storage &storage() { return storage_; }
StorageBase &storage() { return *storage_; }
private:
Storage storage_;
std::unique_ptr<StorageBase> storage_;
};
} // namespace visualdl
......
......@@ -11,19 +11,25 @@ protected:
InformationMaintainer &im = InformationMaintainer::Global();
};
TEST_F(ImTester, AddTablet) { im.AddTablet("tag0", 20); }
TEST_F(ImTester, AddTablet) {
im.Clear();
im.AddTablet("tag0", 20);
}
TEST_F(ImTester, AddRecord) {
storage::Record rcd;
rcd.set_dtype(storage::DataType::kInt32s);
im.Clear();
im.AddTablet("tag0", 20);
for (int i = 0; i < 100; i++) {
storage::Record rcd;
rcd.set_dtype(storage::DataType::kInt32s);
for (int j = 0; j < 10; j++) {
rcd.add_data()->add_i32s(i * 20 + j);
}
im.AddRecord("tag0", rcd);
}
ASSERT_EQ(im.storage().Find("tag0")->records_size(), 20UL);
ASSERT_EQ(im.storage().tablet("tag0")->records_size(), 100UL);
}
} // namespace visualdl
......@@ -98,7 +98,7 @@ public:
InformationMaintainer::Global().storage().mutable_data());
}
TabletHelper tablet(const std::string &tag) {
return TabletHelper(InformationMaintainer::Global().storage().Find(tag));
return TabletHelper(InformationMaintainer::Global().storage().tablet(tag));
}
TabletHelper AddTablet(const std::string &tag, int num_samples) {
return TabletHelper(
......
......@@ -2,62 +2,69 @@
#include <fstream>
#include "visualdl/backend/storage/storage.h"
#include "visualdl/backend/utils/filesystem.h"
namespace visualdl {
storage::Tablet *Storage::Add(const std::string &tag, int num_samples) {
auto *tablet = &(*proto_.mutable_tablets())[tag];
tablet->set_num_samples(num_samples);
return tablet;
std::string GenPathFromTag(const std::string &dir, const std::string &tag) {
return dir + "/" + tag;
}
storage::Tablet *Storage::Find(const std::string &tag) {
auto it = proto_.mutable_tablets()->find(tag);
if (it != proto_.tablets().end()) {
const std::string StorageBase::meta_file_name = "storage.meta";
storage::Tablet *MemoryStorage::NewTablet(const std::string &tag,
int num_samples) {
auto it = tablets_.find(tag);
if (it == tablets_.end()) {
// create new tablet
tablets_[tag] = storage::Tablet();
tablets_[tag].set_tag(tag);
*storage_.add_tags() = tag;
} else {
return &it->second;
}
return nullptr;
}
storage::Record *Storage::NewRecord(const std::string &tag) {
auto *tablet = Find(tag);
CHECK(tablet) << "Tablet" << tag << " should be create first";
auto *record = tablet->mutable_records()->Add();
// increase num_records
int num_records = tablet->total_records();
tablet->set_total_records(num_records + 1);
return record;
return &tablets_[tag];
}
storage::Record *Storage::GetRecord(const std::string &tag, int offset) {
auto *tablet = Find(tag);
CHECK(tablet) << "Tablet" << tag << " should be create first";
auto num_records = tablet->total_records();
CHECK_LT(offset, num_records) << "invalid offset";
return tablet->mutable_records()->Mutable(offset);
storage::Tablet *MemoryStorage::tablet(const std::string &tag) {
auto it = tablets_.find(tag);
CHECK(it != tablets_.end()) << "tablet tagged as " << tag << " not exists";
return &it->second;
}
void Storage::Save(const std::string &path) const {
std::ofstream file(path, file.binary | file.out);
CHECK(file.is_open()) << "can't open path " << path;
auto str = Serialize();
file.write(str.c_str(), str.size());
}
void Storage::Load(const std::string &path) {
std::ifstream file(path, file.binary);
CHECK(file.is_open()) << "can't open path " << path;
size_t size = file.tellg();
std::string buffer(size, ' ');
file.seekg(0);
file.read(&buffer[0], size);
DeSerialize(buffer);
void MemoryStorage::PersistToDisk() const {
VLOG(3) << "persist storage to disk path " << storage_.dir();
// make a directory if not exist
fs::TryMkdir(storage_.dir());
// write storage out
CHECK(!storage_.dir().empty()) << "storage's dir should be set first";
const auto meta_path = storage_.dir() + "/" + meta_file_name;
fs::Write(meta_path, fs::Serialize(storage_));
// write all the tablets
for (auto tag : storage_.tags()) {
auto path = GenPathFromTag(storage_.dir(), tag);
auto it = tablets_.find(tag);
CHECK(it != tablets_.end());
fs::Write(path, fs::Serialize(it->second));
}
}
std::string Storage::Serialize() const { return proto_.SerializeAsString(); }
void MemoryStorage::LoadFromDisk(const std::string &dir) {
VLOG(3) << "load storage from disk path " << dir;
CHECK(!dir.empty()) << "dir is empty";
// load storage
const auto meta_path = dir + "/" + meta_file_name;
auto buf = fs::Read(meta_path);
CHECK(fs::DeSerialize(&storage_, buf))
<< "failed to parse protobuf loaded from " << meta_path;
void Storage::DeSerialize(const std::string &data) {
proto_.ParseFromString(data);
// load all the tablets
for (int i = 0; i < storage_.tags_size(); i++) {
std::string tag = storage_.tags(i);
auto path = GenPathFromTag(storage_.dir(), tag);
CHECK(tablets_[tag].ParseFromString(fs::Read(path)))
<< "failed to parse protobuf text loaded from " << path;
}
}
} // namespace visualdl
......@@ -2,68 +2,84 @@
#define VISUALDL_STORAGE_H
#include <time.h>
#include <map>
#include <string>
#include "visualdl/backend/storage/storage.pb.h"
namespace visualdl {
class Storage final {
/*
* Generate a tablet path in disk from its tag.
*/
inline std::string GenPathFromTag(const std::string &dir,
const std::string &tag);
/*
* Storage Interface. The might be a bunch of implementations, for example, a
* MemStorage that keep a copy of all the taplets in memory, can be changed with
* a higher performance; a DiskStorage that keep all the data in disk, apply to
* the scenerios where memory consumption should be considered.
*/
class StorageBase {
public:
Storage() {
// set time stamp
time_t time0;
time(&time0);
proto_.set_timestamp(time0);
const static std::string meta_file_name;
enum Type { kMemory = 0, kDisk = 1 };
void SetStorage(const std::string &dir) {
time_t t;
time(&t);
storage_.set_timestamp(t);
storage_.set_dir(dir);
}
/*
* Add a new tablet named `tag`, the newly added instance will be returned.
* Create a new Tablet storage.
*/
storage::Tablet *Add(const std::string &tag, int num_samples);
virtual storage::Tablet *NewTablet(const std::string &tag,
int num_samples) = 0;
/*
* Search the tablet named `tag`, if not exist, return nullptr.
* Get a tablet from memory, this can be viewed as a cache, if the storage is
* in disk, a hash map in memory will first load the corresponding Tablet
* Protobuf from disk and hold all the changes.
*/
storage::Tablet *Find(const std::string &tag);
virtual storage::Tablet *tablet(const std::string &tag) = 0;
/*
* Append a new record to the tail of tablet.
* Persist the data from cache to disk. Both the memory storage or disk
* storage should write changes to disk for persistence.
*/
storage::Record *NewRecord(const std::string &tag);
virtual void PersistToDisk() const = 0;
/*
* Get a record at `offset`, if the offset is not valid, yield a failed CHECK.
* Load data from disk.
*/
storage::Record *GetRecord(const std::string &tag, int offset);
virtual void LoadFromDisk(const std::string &dir) = 0;
/*
* Serialize this object to string and save it to a file.
*/
void Save(const std::string &path) const;
storage::Storage *mutable_data() { return &storage_; }
const storage::Storage &data() { return storage_; }
/*
* Load the Protobuf message from a file.
*/
void Load(const std::string &path);
protected:
storage::Storage storage_;
};
storage::Storage *mutable_data() { return &proto_; }
/*
* Storage in Memory, that will support quick edits on data.
*/
class MemoryStorage final : public StorageBase {
public:
storage::Tablet *NewTablet(const std::string &tag, int num_samples) override;
const storage::Storage &data() { return proto_; }
storage::Tablet *tablet(const std::string &tag) override;
protected:
/*
* Serialize the Storage instance to string.
*/
std::string Serialize() const;
void PersistToDisk() const override;
/*
* De-serialize from a string and update this Storage instance.
*/
void DeSerialize(const std::string &data);
void LoadFromDisk(const std::string &dir) override;
private:
storage::Storage proto_;
std::map<std::string, storage::Tablet> tablets_;
};
} // namespace visualdl
......
......@@ -130,7 +130,9 @@ The Storage stores all the records.
*/
message Storage {
// tags to Tablet, should be thread safe if fix the keys after initialization.
// TODO to delete in the new storage interface.
map<string, Tablet> tablets = 1;
repeated string tags = 4;
string dir = 2;
int64 timestamp = 3;
}
\ No newline at end of file
}
#include "visualdl/backend/storage/storage.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
namespace visualdl {
using namespace std;
class MemoryStorageTest : public ::testing::Test {
public:
void SetUp() override { storage_.SetStorage("./tmp"); }
MemoryStorage storage_;
};
TEST_F(MemoryStorageTest, SetStorage) {
string dir = "./tmp";
storage_.SetStorage(dir);
ASSERT_EQ(storage_.data().dir(), dir);
}
TEST_F(MemoryStorageTest, AddTablet) {
// TODO need to escape tag as name
string tag = "add%20tag0";
storage_.NewTablet(tag, -1);
auto* tablet = storage_.tablet(tag);
ASSERT_TRUE(tablet != nullptr);
ASSERT_EQ(tablet->tag(), tag);
}
TEST_F(MemoryStorageTest, PersistToDisk) {
string tag = "add%20tag0";
storage_.NewTablet(tag, -1);
storage_.PersistToDisk();
MemoryStorage other;
other.LoadFromDisk("./tmp");
ASSERT_EQ(other.data().SerializeAsString(),
storage_.data().SerializeAsString());
}
} // namespace visualdl
#ifndef VISUALDL_BACKEND_UTILS_FILESYSTEM_H
#define VISUALDL_BACKEND_UTILS_FILESYSTEM_H
#include <google/protobuf/text_format.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <fstream>
namespace visualdl {
namespace fs {
template <typename T>
std::string Serialize(const T& proto, bool human_readable = false) {
if (human_readable) {
std::string buffer;
google::protobuf::TextFormat::PrintToString(proto, &buffer);
return buffer;
}
return proto.SerializeAsString();
}
template <typename T>
bool DeSerialize(T* proto, const std::string buf, bool human_readable = false) {
// NOTE human_readable not valid
if (human_readable) {
return google::protobuf::TextFormat::ParseFromString(buf, proto);
}
return proto->ParseFromString(buf);
}
void TryMkdir(const std::string& dir) {
VLOG(1) << "try to mkdir " << dir;
struct stat st = {0};
if (stat(dir.c_str(), &st) == -1) {
::mkdir(dir.c_str(), 0700);
}
}
inline void Write(const std::string& path,
const std::string& buffer,
std::ios::openmode open_mode = std::ios::binary) {
VLOG(1) << "write to path " << path;
std::ofstream file(path, open_mode);
CHECK(file.is_open()) << "failed to open " << path;
file.write(buffer.c_str(), buffer.size());
file.close();
}
inline std::string Read(const std::string& path,
std::ios::openmode open_mode = std::ios::binary) {
VLOG(1) << "read from path " << path;
std::string buffer;
std::ifstream file(path, open_mode | std::ios::ate);
CHECK(file.is_open()) << "failed to open " << path;
size_t size = file.tellg();
file.seekg(0);
buffer.resize(size);
file.read(&buffer[0], size);
return buffer;
}
} // namespace fs
} // namespace visualdl
#endif // VISUALDL_BACKEND_UTILS_FILESYSTEM_H
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册