From ef48ef34aaa012eae9481ab0dbe8e0796b43d41b Mon Sep 17 00:00:00 2001 From: Thuan Nguyen Date: Tue, 6 Feb 2018 01:51:22 -0800 Subject: [PATCH] Binary Record Fixe (#260) * Binary Record Fixes * Fixed issues with binary_record.h filename generator method. std::hash is not consistent across OS. It will generate different hash's when ran on different OS on the same string. Change the filename generator to use a static counter. It will be more ideal to use UUID generator available in boost 1.4.2, however, boost 1.4.2 has problems compiling on c++11 \n* Fix issue with pytorch demo where it logs to the absolute path /workspace * Fix performance issue on server when server fails to load image file. Currently server retries an API call 3 times before it fails, at which point it'll sleep the thread for 2 secs. This becomes problematic when there are alot of SDK errors. I'll create seperate issue to refine the retry logic to not be so expensive on the server. --- CMakeLists.txt | 2 +- demo/pytorch/pytorch_cifar10.py | 2 +- requirements.txt | 2 + visualdl/logic/CMakeLists.txt | 6 +-- visualdl/logic/sdk.cc | 6 +-- visualdl/storage/CMakeLists.txt | 3 +- visualdl/storage/binary_record.cc | 21 ++++++++++ visualdl/storage/binary_record.h | 57 ++++++++++++++++---------- visualdl/storage/test_binary_record.cc | 2 +- 9 files changed, 70 insertions(+), 31 deletions(-) create mode 100644 visualdl/storage/binary_record.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c6173cc..7f094269 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,7 @@ add_executable(vl_test ${PROJECT_SOURCE_DIR}/visualdl/utils/concurrency.h ${PROJECT_SOURCE_DIR}/visualdl/utils/filesystem.h ) -target_link_libraries(vl_test sdk storage entry tablet im gtest glog protobuf gflags pthread eigen3) +target_link_libraries(vl_test sdk storage binary_record entry tablet im gtest glog protobuf gflags pthread eigen3) enable_testing () add_test(NAME vstest COMMAND ./vl_test) endif(NOT ON_RELEASE) diff --git a/demo/pytorch/pytorch_cifar10.py b/demo/pytorch/pytorch_cifar10.py index e68f1aed..4915a932 100644 --- a/demo/pytorch/pytorch_cifar10.py +++ b/demo/pytorch/pytorch_cifar10.py @@ -43,7 +43,7 @@ def imshow(img): fig.savefig('out' + str(np.random.randint(0, 10000)) + '.pdf') -logdir = "/workspace" +logdir = "./workspace" logger = LogWriter(logdir, sync_cycle=100) # mark the components with 'train' label. diff --git a/requirements.txt b/requirements.txt index 927faa90..ba8ef6a7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,5 @@ sphinx-rtd-theme==0.2.4 flake8==3.5.0 Pillow==5.0.0 pre-commit==1.5.1 +flask==0.12.2 +protobuf==3.5.1 diff --git a/visualdl/logic/CMakeLists.txt b/visualdl/logic/CMakeLists.txt index bcabd3b5..afb35315 100644 --- a/visualdl/logic/CMakeLists.txt +++ b/visualdl/logic/CMakeLists.txt @@ -15,7 +15,7 @@ add_library(im ${PROJECT_SOURCE_DIR}/visualdl/logic/im.cc) add_library(sdk ${PROJECT_SOURCE_DIR}/visualdl/logic/sdk.cc ${PROJECT_SOURCE_DIR}/visualdl/utils/image.h) add_dependencies(im storage_proto) -add_dependencies(sdk entry storage storage_proto eigen3) +add_dependencies(sdk entry binary_record storage storage_proto eigen3) ## pybind set(OPTIONAL_LINK_FLAGS) @@ -28,10 +28,10 @@ add_library(core SHARED ${PROJECT_SOURCE_DIR}/visualdl/logic/pybind.cc) if (NOT ON_RELEASE) add_dependencies(core pybind python im entry tablet storage sdk protobuf glog eigen3) - target_link_libraries(core PRIVATE pybind entry python im tablet storage sdk protobuf glog ${OPTIONAL_LINK_FLAGS}) + target_link_libraries(core PRIVATE pybind entry binary_record python im tablet storage sdk protobuf glog ${OPTIONAL_LINK_FLAGS}) else() add_dependencies(core pybind python im entry tablet storage sdk protobuf eigen3) - target_link_libraries(core PRIVATE pybind entry python im tablet storage sdk protobuf ${OPTIONAL_LINK_FLAGS}) + target_link_libraries(core PRIVATE pybind entry binary_record python im tablet storage sdk protobuf ${OPTIONAL_LINK_FLAGS}) endif() set_target_properties(core PROPERTIES PREFIX "" SUFFIX ".so") diff --git a/visualdl/logic/sdk.cc b/visualdl/logic/sdk.cc index 94dce5c2..3edc8c27 100644 --- a/visualdl/logic/sdk.cc +++ b/visualdl/logic/sdk.cc @@ -244,7 +244,7 @@ void Image::SetSample(int index, CHECK_EQ(std::remove(old_path.c_str()), 0) << "delete old binary record " << old_path << " failed"; } - entry.SetRaw(brcd.hash()); + entry.SetRaw(brcd.filename()); static_assert( !is_same_type::value, @@ -268,10 +268,10 @@ ImageReader::ImageRecord ImageReader::record(int offset, int index) { ImageRecord res; auto record = reader_.record(offset); auto entry = record.data(index); - auto data_hash = entry.GetRaw(); + auto filename = entry.GetRaw(); CHECK(!g_log_dir.empty()) << "g_log_dir should be set in LogReader construction"; - BinaryRecordReader brcd(GenBinaryRecordDir(g_log_dir), data_hash); + BinaryRecordReader brcd(GenBinaryRecordDir(g_log_dir), filename); std::transform(brcd.data.begin(), brcd.data.end(), diff --git a/visualdl/storage/CMakeLists.txt b/visualdl/storage/CMakeLists.txt index 51a0b415..0a788ae7 100644 --- a/visualdl/storage/CMakeLists.txt +++ b/visualdl/storage/CMakeLists.txt @@ -7,9 +7,10 @@ add_dependencies(storage_proto protobuf) add_library(entry entry.cc entry.h ${PROTO_SRCS} ${PROTO_HDRS}) add_library(tablet tablet.cc tablet.h ${PROTO_SRCS} ${PROTO_HDRS}) add_library(record record.cc record.h ${PROTO_SRCS} ${PROTO_HDRS}) +add_library(binary_record binary_record.cc binary_record.h ${PROTO_SRCS} ${PROTO_HDRS}) add_library(storage storage.cc storage.h ${PROTO_SRCS} ${PROTO_HDRS}) add_dependencies(entry storage_proto im) add_dependencies(record storage_proto entry) add_dependencies(tablet storage_proto) -add_dependencies(storage storage_proto record tablet entry) +add_dependencies(storage storage_proto record binary_record tablet entry) diff --git a/visualdl/storage/binary_record.cc b/visualdl/storage/binary_record.cc new file mode 100644 index 00000000..29e3bdd6 --- /dev/null +++ b/visualdl/storage/binary_record.cc @@ -0,0 +1,21 @@ +/* Copyright (c) 2017 VisualDL Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "visualdl/storage/binary_record.h" + +namespace visualdl { + +int BinaryRecord::counter_ = 0; + +} // namespace visualdl diff --git a/visualdl/storage/binary_record.h b/visualdl/storage/binary_record.h index 529bd95e..8b8c42c6 100644 --- a/visualdl/storage/binary_record.h +++ b/visualdl/storage/binary_record.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include +#include #include "visualdl/utils/filesystem.h" @@ -32,12 +33,10 @@ static std::string GenBinaryRecordDir(const std::string& dir) { // directly in protobuf. So a simple binary storage is used to serialize images // to disk. struct BinaryRecord { - std::hash hasher; - BinaryRecord(const std::string dir, std::string&& data) : data_(data), dir_(dir) { - hash_ = std::to_string(hasher(data)); - path_ = dir + "/" + hash(); + filename_ = GenerateFileName(); + path_ = dir + "/" + filename_; } const std::string& path() { return path_; } @@ -52,42 +51,58 @@ struct BinaryRecord { file.write(data_.data(), data_.size()); } - const std::string& hash() { return hash_; } + static std::string GenerateFileName() { + std::stringstream ss; + ss << counter_++; + return ss.str(); + } + + const std::string& filename() { return filename_; } private: std::string dir_; std::string path_; std::string data_; - std::string hash_; + std::string filename_; + static int counter_; }; struct BinaryRecordReader { std::string data; - std::hash hasher; - BinaryRecordReader(const std::string& dir, const std::string& hash) - : dir_(dir), hash_(hash) { + BinaryRecordReader(const std::string& dir, const std::string& filename) + : dir_(dir), filename_(filename) { fromfile(); } - std::string hash() { return std::to_string(hasher(data)); } + + const std::string& filename() { return filename_; } protected: void fromfile() { - std::string path = dir_ + "/" + hash_; - std::ifstream file(path, file.binary); - CHECK(file.is_open()) << " failed to open file " << path; - - size_t size; - file.read(reinterpret_cast(&size), sizeof(size_t)); - data.resize(size); - file.read(&data[0], size); - - CHECK_EQ(hash(), hash_) << "data broken: " << path; + // filename_ can be empty if a user stops training abruptly. In that case, + // we shouldn't load the file. The server will hang if we try to load the + // empty file. For now we don't throw an assert since it causes performance + // problems on the server. + // TODO(thuan): Update SDK.cc to not add image record if user stops training + // abruptly and no filename_ is set. + // TODO(thuan): Optimize visualDL server retry logic when a request fails. + // Currently if SDK call fails, server will issue multiple retries, + // which impacts performance. + if (!filename_.empty()) { + std::string path = dir_ + "/" + filename_; + std::ifstream file(path, file.binary); + CHECK(file.is_open()) << " failed to open file " << path; + + size_t size; + file.read(reinterpret_cast(&size), sizeof(size_t)); + data.resize(size); + file.read(&data[0], size); + } } private: std::string dir_; - std::string hash_; + std::string filename_; }; } // namespace visualdl diff --git a/visualdl/storage/test_binary_record.cc b/visualdl/storage/test_binary_record.cc index c51dcbee..52ea6cbb 100644 --- a/visualdl/storage/test_binary_record.cc +++ b/visualdl/storage/test_binary_record.cc @@ -23,7 +23,7 @@ TEST(BinaryRecord, init) { BinaryRecord rcd("./", std::move(message)); rcd.tofile(); - BinaryRecordReader reader("./", rcd.hash()); + BinaryRecordReader reader("./", rcd.filename()); LOG(INFO) << reader.data; ASSERT_EQ(reader.data, "hello world"); } -- GitLab