未验证 提交 eb18cf33 编写于 作者: Y Yan Chunwei 提交者: GitHub

feature/enhance image storage (#120)

上级 41b4f4b0
......@@ -51,6 +51,7 @@ add_executable(vl_test
${PROJECT_SOURCE_DIR}/visualdl/logic/sdk_test.cc
${PROJECT_SOURCE_DIR}/visualdl/logic/histogram_test.cc
${PROJECT_SOURCE_DIR}/visualdl/storage/storage_test.cc
${PROJECT_SOURCE_DIR}/visualdl/storage/test_binary_record.cc
${PROJECT_SOURCE_DIR}/visualdl/utils/test_concurrency.cc
${PROJECT_SOURCE_DIR}/visualdl/utils/test_image.cc
${PROJECT_SOURCE_DIR}/visualdl/utils/concurrency.h
......
#include "visualdl/logic/sdk.h"
#include "visualdl/logic/histogram.h"
#include "visualdl/storage/binary_record.h"
#include "visualdl/utils/image.h"
#include "visualdl/utils/macro.h"
namespace visualdl {
// global log dir, a hack solution to pass accross all the components.
// One process of VDL backend can only process a single logdir, so this
// is OK.
std::string g_log_dir;
LogWriter LogWriter::AsMode(const std::string& mode) {
LogWriter writer = *this;
storage_.AddMode(mode);
writer.mode_ = mode;
return writer;
}
Tablet LogWriter::AddTablet(const std::string& tag) {
// TODO(ChunweiYan) add string check here.
auto tmp = mode_ + "/" + tag;
string::TagEncode(tmp);
auto res = storage_.AddTablet(tmp);
res.SetCaptions(std::vector<std::string>({mode_}));
res.SetTag(mode_, tag);
return res;
}
LogReader::LogReader(const std::string& dir) : reader_(dir) { g_log_dir = dir; }
LogReader LogReader::AsMode(const std::string& mode) {
auto tmp = *this;
tmp.mode_ = mode;
return tmp;
}
TabletReader LogReader::tablet(const std::string& tag) {
auto tmp = mode_ + "/" + tag;
string::TagEncode(tmp);
return reader_.tablet(tmp);
}
std::vector<std::string> LogReader::all_tags() {
auto tags = reader_.all_tags();
auto it =
std::remove_if(tags.begin(), tags.end(), [&](const std::string& tag) {
return !TagMatchMode(tag, mode_);
});
tags.erase(it + 1);
return tags;
}
std::vector<std::string> LogReader::tags(const std::string& component) {
auto type = Tablet::type(component);
auto tags = reader_.tags(type);
CHECK(!tags.empty()) << "component " << component << " has no taged records";
std::vector<std::string> res;
for (const auto& tag : tags) {
if (TagMatchMode(tag, mode_)) {
res.push_back(GenReadableTag(mode_, tag));
}
}
return res;
}
std::string LogReader::GenReadableTag(const std::string& mode,
const std::string& tag) {
auto tmp = tag;
string::TagDecode(tmp);
return tmp.substr(mode.size() + 1); // including `/`
}
bool LogReader::TagMatchMode(const std::string& tag, const std::string& mode) {
if (tag.size() <= mode.size()) return false;
return tag.substr(0, mode.size()) == mode;
}
namespace components {
template <typename T>
......@@ -103,8 +175,10 @@ void Image::SetSample(int index,
new_shape.emplace_back(1);
}
// production
int size = std::accumulate(
new_shape.begin(), new_shape.end(), 1., [](int a, int b) { return a * b; });
int size =
std::accumulate(new_shape.begin(), new_shape.end(), 1., [](int a, int b) {
return a * b;
});
CHECK_GT(size, 0);
CHECK_LE(new_shape.size(), 3)
<< "shape should be something like (width, height, num_channel)";
......@@ -114,7 +188,6 @@ void Image::SetSample(int index,
CHECK_LT(index, num_samples_);
CHECK_LE(index, num_records_);
auto entry = step_.MutableData<std::vector<byte_t>>(index);
// trick to store int8 to protobuf
std::vector<byte_t> data_str(data.size());
for (int i = 0; i < data.size(); i++) {
......@@ -122,9 +195,14 @@ void Image::SetSample(int index,
}
Uint8Image image(new_shape[2], new_shape[0] * new_shape[1]);
NormalizeImage(&image, &data[0], new_shape[0] * new_shape[1], new_shape[2]);
// entry.SetRaw(std::string(data_str.begin(), data_str.end()));
entry.SetRaw(
BinaryRecord brcd(
GenBinaryRecordDir(step_.parent()->dir()),
std::string(image.data(), image.data() + image.rows() * image.cols()));
brcd.tofile();
auto entry = step_.MutableData<std::vector<byte_t>>(index);
entry.SetRaw(brcd.hash());
static_assert(
!is_same_type<value_t, shape_t>::value,
......@@ -132,12 +210,6 @@ void Image::SetSample(int index,
// set meta.
entry.SetMulti(new_shape);
// // set meta with hack
// Entry<shape_t> meta;
// meta.set_parent(entry.parent());
// meta.entry = entry.entry;
// meta.SetMulti(shape);
}
std::string ImageReader::caption() {
......@@ -154,9 +226,13 @@ ImageReader::ImageRecord ImageReader::record(int offset, int index) {
ImageRecord res;
auto record = reader_.record(offset);
auto entry = record.data(index);
auto data_str = entry.GetRaw();
std::transform(data_str.begin(),
data_str.end(),
auto data_hash = entry.GetRaw();
CHECK(!g_log_dir.empty())
<< "g_log_dir should be set in LogReader construction";
BinaryRecordReader brcd(GenBinaryRecordDir(g_log_dir), data_hash);
std::transform(brcd.data.begin(),
brcd.data.end(),
std::back_inserter(res.data),
[](byte_t i) { return (int)(i); });
res.shape = entry.GetMulti<shape_t>();
......
......@@ -31,25 +31,9 @@ public:
storage_.AddMode(mode);
}
LogWriter AsMode(const std::string& mode) {
LogWriter writer = *this;
storage_.AddMode(mode);
writer.mode_ = mode;
return writer;
}
LogWriter AsMode(const std::string& mode);
/**
* create a new tablet
*/
Tablet AddTablet(const std::string& tag) {
// TODO(ChunweiYan) add string check here.
auto tmp = mode_ + "/" + tag;
string::TagEncode(tmp);
auto res = storage_.AddTablet(tmp);
res.SetCaptions(std::vector<std::string>({mode_}));
res.SetTag(mode_, tag);
return res;
}
Tablet AddTablet(const std::string& tag);
Storage& storage() { return storage_; }
......@@ -64,61 +48,26 @@ private:
*/
class LogReader {
public:
LogReader(const std::string& dir) : reader_(dir) {}
LogReader(const std::string& dir);
void SetMode(const std::string& mode) { mode_ = mode; }
LogReader AsMode(const std::string& mode) {
auto tmp = *this;
tmp.mode_ = mode;
return tmp;
}
LogReader AsMode(const std::string& mode);
const std::string& mode() { return mode_; }
TabletReader tablet(const std::string& tag) {
auto tmp = mode_ + "/" + tag;
string::TagEncode(tmp);
return reader_.tablet(tmp);
}
TabletReader tablet(const std::string& tag);
std::vector<std::string> all_tags() {
auto tags = reader_.all_tags();
auto it =
std::remove_if(tags.begin(), tags.end(), [&](const std::string& tag) {
return !TagMatchMode(tag, mode_);
});
tags.erase(it + 1);
return tags;
}
std::vector<std::string> all_tags();
std::vector<std::string> tags(const std::string& component) {
auto type = Tablet::type(component);
auto tags = reader_.tags(type);
CHECK(!tags.empty()) << "component " << component
<< " has no taged records";
std::vector<std::string> res;
for (const auto& tag : tags) {
if (TagMatchMode(tag, mode_)) {
res.push_back(GenReadableTag(mode_, tag));
}
}
return res;
}
std::vector<std::string> tags(const std::string& component);
StorageReader& storage() { return reader_; }
static std::string GenReadableTag(const std::string& mode,
const std::string& tag) {
auto tmp = tag;
string::TagDecode(tmp);
return tmp.substr(mode.size() + 1); // including `/`
}
const std::string& tag);
static bool TagMatchMode(const std::string& tag, const std::string& mode) {
if (tag.size() <= mode.size()) return false;
return tag.substr(0, mode.size()) == mode;
}
static bool TagMatchMode(const std::string& tag, const std::string& mode);
protected:
private:
......
......@@ -137,11 +137,6 @@ def get_invididual_image(storage, mode, tag, step_index, max_size=80):
data = np.array(record.data(), dtype='uint8').reshape(shape)
tempfile = NamedTemporaryFile(mode='w+b', suffix='.png')
with Image.fromarray(data) as im:
size = max(shape[0], shape[1])
if size > max_size:
scale = max_size * 1. / size
scaled_shape = (int(shape[0] * scale), int(shape[1] * scale))
im = im.resize(scaled_shape)
im.save(tempfile)
tempfile.seek(0, 0)
return tempfile
......
#ifndef VISUALDL_STORAGE_BINARY_RECORD_H
#define VISUALDL_STORAGE_BINARY_RECORD_H
#include <fstream>
#include <functional>
#include "visualdl/utils/filesystem.h"
namespace visualdl {
static std::string GenBinaryRecordDir(const std::string& dir) {
return dir + "/binary_records";
}
// A storage helper to save large file(currently just for Image component).
// The protobuf message has some limitation on meassage size, and LogWriter
// will maintain a memory of all the messages, it is bad to store images
// directly in protobuf. So a simple binary storage is used to serialize images
// to disk.
struct BinaryRecord {
std::hash<std::string> hasher;
BinaryRecord(const std::string dir, std::string&& data)
: data_(data), dir_(dir) {
hash_ = std::to_string(hasher(data));
path_ = dir + "/" + hash();
}
const std::string& path() { return path_; }
void tofile() {
fs::TryRecurMkdir(dir_);
std::fstream file(path_, file.binary | file.out);
CHECK(file.is_open()) << "open " << path_ << " failed";
size_t size = data_.size();
file.write(reinterpret_cast<char*>(&size), sizeof(size));
file.write(data_.data(), data_.size());
}
const std::string& hash() { return hash_; }
private:
std::string dir_;
std::string path_;
std::string data_;
std::string hash_;
};
struct BinaryRecordReader {
std::string data;
std::hash<std::string> hasher;
BinaryRecordReader(const std::string& dir, const std::string& hash)
: dir_(dir), hash_(hash) {
fromfile();
}
std::string hash() { return std::to_string(hasher(data)); }
protected:
void fromfile() {
std::string path = dir_ + "/" + hash_;
std::ifstream file(path, file.binary);
CHECK(file.is_open()) << " failed to open file " << path;
size_t size;
file.read(reinterpret_cast<char*>(&size), sizeof(size_t));
data.resize(size);
file.read(&data[0], size);
CHECK_EQ(hash(), hash_) << "data broken: " << path;
}
private:
std::string dir_;
std::string hash_;
};
} // namespace visualdl
#endif
#include "visualdl/storage/binary_record.h"
#include <gtest/gtest.h>
using namespace visualdl;
TEST(BinaryRecord, init) {
std::string message = "hello world";
BinaryRecord rcd("./", std::move(message));
rcd.tofile();
BinaryRecordReader reader("./", rcd.hash());
LOG(INFO) << reader.data;
ASSERT_EQ(reader.data, "hello world");
}
......@@ -7,6 +7,8 @@
#include <unistd.h>
#include <fstream>
#include "visualdl/utils/logging.h"
namespace visualdl {
namespace fs {
......@@ -44,7 +46,6 @@ bool DeSerializeFromFile(T* proto, const std::string& path) {
}
static void TryMkdir(const std::string& dir) {
// VLOG(1) << "try to mkdir " << dir;
struct stat st = {0};
if (stat(dir.c_str(), &st) == -1) {
::mkdir(dir.c_str(), 0700);
......@@ -67,7 +68,6 @@ static void TryRecurMkdir(const std::string& path) {
inline void Write(const std::string& path,
const std::string& buffer,
std::ios::openmode open_mode = std::ios::binary) {
VLOG(1) << "write to path " << path;
std::ofstream file(path, open_mode);
CHECK(file.is_open()) << "failed to open " << path;
file.write(buffer.c_str(), buffer.size());
......@@ -76,7 +76,6 @@ inline void Write(const std::string& path,
inline std::string Read(const std::string& path,
std::ios::openmode open_mode = std::ios::binary) {
VLOG(1) << "read from path " << path;
std::string buffer;
std::ifstream file(path, open_mode | std::ios::ate);
CHECK(file.is_open()) << "failed to open " << path;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册