From 9735d1b81518c17a4506bbc7b7fd8810b20abb29 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 3 Aug 2022 19:40:24 +0800 Subject: [PATCH] [jit] c++ property deserialization & Variable support vector of int, float (#44727) * c++ property deserialization * fix for comment * more error info * fix exception info * fix ci * fix compile * fix layer test ci --- cmake/external/gtest.cmake | 13 +- paddle/fluid/framework/var_type_traits.h | 4 +- paddle/fluid/framework/variable_test.cc | 9 + paddle/fluid/jit/CMakeLists.txt | 23 +-- paddle/fluid/jit/layer.cc | 29 +++- paddle/fluid/jit/layer.h | 9 +- paddle/fluid/jit/layer_test.cc | 26 ++- paddle/fluid/jit/property.cc | 211 ++++++++++++++++++++++- paddle/fluid/jit/property.h | 31 +++- paddle/fluid/jit/serializer.cc | 25 ++- paddle/fluid/jit/serializer_utils.h | 1 + 11 files changed, 353 insertions(+), 28 deletions(-) diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake index 3833a6ca86..f26410dab0 100644 --- a/cmake/external/gtest.cmake +++ b/cmake/external/gtest.cmake @@ -38,6 +38,9 @@ if(WIN32) set(GTEST_MAIN_LIBRARIES "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/gtest_main.lib" CACHE FILEPATH "gtest main libraries." FORCE) + set(GMOCK_LIBRARIES + "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgmock.lib" + CACHE FILEPATH "gmock libraries." FORCE) string(REPLACE "/w " "" GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") string(REPLACE "/w " "" GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "/W0 " "" GTEST_CMAKE_C_FLAGS "${GTEST_CMAKE_C_FLAGS}") @@ -49,6 +52,9 @@ else() set(GTEST_MAIN_LIBRARIES "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE) + set(GMOCK_LIBRARIES + "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgmock.a" + CACHE FILEPATH "gmock libraries." FORCE) set(GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") set(GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() @@ -86,7 +92,8 @@ ExternalProject_Add( -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} BUILD_BYPRODUCTS ${GTEST_LIBRARIES} - BUILD_BYPRODUCTS ${GTEST_MAIN_LIBRARIES}) + BUILD_BYPRODUCTS ${GTEST_MAIN_LIBRARIES} + BUILD_BYPRODUCTS ${GMOCK_LIBRARIES}) add_library(gtest STATIC IMPORTED GLOBAL) set_property(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES}) @@ -96,3 +103,7 @@ add_library(gtest_main STATIC IMPORTED GLOBAL) set_property(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES}) add_dependencies(gtest_main extern_gtest) + +add_library(gmock STATIC IMPORTED GLOBAL) +set_property(TARGET gmock PROPERTY IMPORTED_LOCATION ${GMOCK_LIBRARIES}) +add_dependencies(gmock extern_gtest) diff --git a/paddle/fluid/framework/var_type_traits.h b/paddle/fluid/framework/var_type_traits.h index 0319916374..ea7ebce2da 100644 --- a/paddle/fluid/framework/var_type_traits.h +++ b/paddle/fluid/framework/var_type_traits.h @@ -213,7 +213,9 @@ using VarTypeRegistry = detail::VarTypeRegistryImpl< std::vector>, int, float, - Vocab>; + Vocab, + std::vector, + std::vector>; template struct VarTypeTrait { static_assert(VarTypeRegistry::IsRegistered(), "Must be registered type"); diff --git a/paddle/fluid/framework/variable_test.cc b/paddle/fluid/framework/variable_test.cc index 9d49bfbba6..b998efc1c2 100644 --- a/paddle/fluid/framework/variable_test.cc +++ b/paddle/fluid/framework/variable_test.cc @@ -34,6 +34,15 @@ TEST(Variable, GetMutable) { return; } EXPECT_TRUE(false); + + std::unique_ptr v_ints(new Variable()); + auto* v_t = v_ints->GetMutable>(); + v_t->push_back(1); + v_t->push_back(2); + + const auto& cv_t = v_ints->Get>(); + EXPECT_EQ(cv_t[0], 1); + EXPECT_EQ(cv_t[1], 2); } } // namespace framework diff --git a/paddle/fluid/jit/CMakeLists.txt b/paddle/fluid/jit/CMakeLists.txt index cbbd1baf8f..38d2ae54de 100644 --- a/paddle/fluid/jit/CMakeLists.txt +++ b/paddle/fluid/jit/CMakeLists.txt @@ -1,7 +1,14 @@ +proto_library(paddle_jit_property_proto SRCS property.proto) + +cc_library( + jit_property + SRCS property.cc + DEPS paddle_jit_property_proto tensor) + cc_library( jit_serializer SRCS serializer.cc - DEPS lod_tensor device_context) + DEPS lod_tensor device_context jit_property) cc_library( jit_function_utils @@ -32,9 +39,10 @@ cc_library( if(WITH_TESTING AND NOT WIN32) add_custom_target( jit_download_program - COMMAND wget -nc -q --no-check-certificate - https://paddle-ci.gz.bcebos.com/dy2st/multi_program_load.tar.gz - COMMAND tar zxf multi_program_load.tar.gz) + COMMAND + wget -nc -q + https://paddle-ci.gz.bcebos.com/dy2st/multi_program_load_with_property.tar.gz + COMMAND tar zxf multi_program_load_with_property.tar.gz) set(JIT_DEPS phi phi_api @@ -52,10 +60,3 @@ if(WITH_TESTING AND NOT WIN32) DEPS ${JIT_DEPS}) add_dependencies(layer_test jit_download_program) endif() - -proto_library(paddle_jit_property_proto SRCS property.proto) - -cc_library( - jit_property - SRCS property.cc - DEPS paddle_jit_property_proto) diff --git a/paddle/fluid/jit/layer.cc b/paddle/fluid/jit/layer.cc index 0e981bc459..a80b05e45c 100644 --- a/paddle/fluid/jit/layer.cc +++ b/paddle/fluid/jit/layer.cc @@ -19,11 +19,16 @@ #include "paddle/fluid/jit/base_function.h" #include "paddle/fluid/jit/compilation_unit.h" #include "paddle/fluid/jit/function_schema.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/errors.h" namespace paddle { namespace jit { -Layer::Layer(const Name2VariableMap& params_dict, const phi::Place& place) - : params_dict_(params_dict) { + +Layer::Layer(const Name2VariableMap& params_dict, + const Name2VariableMap& attrs_dict, + const phi::Place& place) + : params_dict_(params_dict), attrs_dict_(attrs_dict) { unit_.reset(new CompilationUnit()); } @@ -57,5 +62,25 @@ const Name2FunctionMap& Layer::FunctionMap() const { return unit_->FunctionMap(); } +#define PD_SPECIALZE_ATTRIBUTE_TYPE(T) \ + template <> \ + T Layer::Attribute(const std::string& name) const { \ + if (attrs_dict_.find(name) == attrs_dict_.end()) { \ + PADDLE_THROW(phi::errors::NotFound( \ + "Attribute can not found %s, please check if it exists.")); \ + return T(); \ + } \ + auto var = attrs_dict_.at(name); \ + T ret = var->Get(); \ + return ret; \ + } + +PD_SPECIALZE_ATTRIBUTE_TYPE(int) +PD_SPECIALZE_ATTRIBUTE_TYPE(float) +PD_SPECIALZE_ATTRIBUTE_TYPE(std::string) +PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector) +PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector) +PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector) + } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/layer.h b/paddle/fluid/jit/layer.h index b2efa77fed..4c6c714d37 100644 --- a/paddle/fluid/jit/layer.h +++ b/paddle/fluid/jit/layer.h @@ -21,7 +21,7 @@ #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/common/place.h" -#include "base_function.h" +#include "base_function.h" //NOLINT namespace paddle { @@ -42,11 +42,14 @@ using Name2FunctionMap = class Layer { public: - Layer(const Name2VariableMap& params_dict, const phi::Place& place); + Layer(const Name2VariableMap& params_dict, + const Name2VariableMap& attrs_dict_, + const phi::Place& place); std::shared_ptr Function(const std::string& name) const; - Variable Attribute(const std::string& name) const; + template + T Attribute(const std::string& name) const; std::vector forward(const std::vector& inputs); diff --git a/paddle/fluid/jit/layer_test.cc b/paddle/fluid/jit/layer_test.cc index 9ac99b50e4..1579610c7a 100644 --- a/paddle/fluid/jit/layer_test.cc +++ b/paddle/fluid/jit/layer_test.cc @@ -14,6 +14,7 @@ #include #include +#include #include "gtest/gtest.h" @@ -71,8 +72,31 @@ TEST(CpuLayerTest, Construct) { auto place = phi::CPUPlace(); std::string path = "./multi_program_load/export"; auto layer = jit::Load(path, place); - auto inputs = PrepareInputs(place); + float fbias = layer.Attribute("fbias"); + EXPECT_FLOAT_EQ(fbias, 1.4); + + int ds = layer.Attribute("down_sampling"); + EXPECT_EQ(ds, 4); + + std::string fstr = layer.Attribute("fstr"); + EXPECT_STREQ(fstr.c_str(), "save str property"); + + std::vector ints = layer.Attribute>("ints"); + EXPECT_EQ(ints[0], 10); + EXPECT_EQ(ints[1], 20); + + std::vector floats = layer.Attribute>("floats"); + EXPECT_FLOAT_EQ(floats[0], 1.1); + EXPECT_FLOAT_EQ(floats[1], 2.2); + + std::vector strs = + layer.Attribute>("strs"); + EXPECT_STREQ(strs[0].c_str(), "hello"); + EXPECT_STREQ(strs[1].c_str(), "world"); + + // functions + auto inputs = PrepareInputs(place); auto outs = layer.forward(inputs); auto out_data = outs[0].data(); EXPECT_NEAR(out_data[0], 0.02194316, 1e-6); diff --git a/paddle/fluid/jit/property.cc b/paddle/fluid/jit/property.cc index ea24757ff6..1cf303239b 100644 --- a/paddle/fluid/jit/property.cc +++ b/paddle/fluid/jit/property.cc @@ -12,16 +12,115 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/jit/property.h" +#include +#include +#include + #include "glog/logging.h" + +#include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/jit/property.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/errors.h" namespace paddle { namespace jit { +using Variable = paddle::framework::Variable; + +void Property::DeserializationFromString(const std::string &str) { + PADDLE_ENFORCE_EQ( + this->Proto()->ParsePartialFromString(str), + true, + phi::errors::InvalidArgument("Failed to parse pb from string")); + return; +} + +std::string Property::SerializationToString() { + std::string retv; + PADDLE_ENFORCE_EQ(this->Proto()->SerializePartialToString(&retv), + true, + phi::errors::InvalidArgument( + "Failed to serialize input Desc to string.")); + return retv; +} + +void Property::Deserialization(const std::string &path) { + std::ifstream ifs(path, std::ios::binary | std::ios::in); + std::string str((std::istreambuf_iterator(ifs)), + std::istreambuf_iterator()); + DeserializationFromString(str); + ifs.close(); + return; +} + +void Property::Serialization(const std::string &path) { + std::string str = SerializationToString(); + std::ofstream ofs(path, std::ios::binary | std::ios::out); + ofs << str; + ofs.close(); + return; +} + int Property::Size() const { return property_.entrys_size(); } +std::vector Property::Names() const { + std::vector res; + for (int i = 0; i < Size(); i++) { + auto entry = property_.entrys(i); + if (entry.has_name()) { + res.push_back(entry.name()); + } else { + LOG(WARNING) << "JIT::Property entry " << i + << " not has name! Please check whether it is reasonable."; + } + } + return res; +} + +std::unordered_map> Property::Values() { + std::unordered_map> res; + using ValueProto = proto::ValueProto; + for (int i = 0; i < Size(); i++) { + auto entry = property_.entrys(i); + if (entry.has_name()) { + auto &n = entry.name(); + // remove Class Name suffix + auto key = n.substr(n.find_first_of(".") + 1); + std::shared_ptr var(new Variable()); + auto type = entry.type(); + switch (type) { + case ValueProto::FLOAT: + *var->GetMutable() = GetFloat(n); + break; + case ValueProto::INT: + *var->GetMutable() = static_cast(GetInt64(n)); + break; + case ValueProto::STRING: + *var->GetMutable() = GetString(n); + break; + case ValueProto::FLOATS: + *var->GetMutable>() = GetFloats(n); + break; + case ValueProto::INTS: + *var->GetMutable>() = GetInt64s(n); + break; + case ValueProto::STRINGS: + *var->GetMutable>() = GetStrings(n); + break; + default: + break; + } + res[key] = var; + VLOG(3) << "read property: " << n << " to " << key; + } else { + LOG(WARNING) << "JIT::Property entry " << i + << " not has name! Please check whether it is reasonable."; + } + } + return res; +} + void Property::SetFloat(const float &f) { auto type = proto::ValueProto::FLOAT; auto entry = property_.add_entrys(); @@ -42,7 +141,16 @@ void Property::SetFloat(const std::string &name, const float &f) { float Property::GetFloat(const std::string &name) const { for (int i = 0; i < Size(); i++) { auto e = property_.entrys(i); + if (e.has_name() && e.name() == name) { + PADDLE_ENFORCE( + e.has_type() && e.type() == proto::ValueProto::FLOAT, + phi::errors::PreconditionNotMet("JIT::Property GetFloat: idx=%d type " + "is not float. Expect %d, but %d", + i, + proto::ValueProto::FLOAT, + e.type())); + return e.f(); } } @@ -91,6 +199,26 @@ void Property::SetFloats(const std::string &name, const std::vector &v) { << " for name: " << name; } +std::vector Property::GetFloats(const std::string &name) { + for (int i = 0; i < Size(); i++) { + auto e = property_.entrys(i); + + if (e.has_name() && e.name() == name) { + PADDLE_ENFORCE( + e.has_type() && e.type() == proto::ValueProto::FLOATS, + phi::errors::PreconditionNotMet( + "JIT::Property GetFloats: idx=%d type is not floats.", i)); + + auto items = e.floats(); + return std::vector(items.begin(), items.end()); + } + } + + PADDLE_THROW(phi::errors::NotFound( + "JIT::Property GetFloats: name: %s not found", name)); + return std::vector(); +} + void Property::SetInt64(const int64_t &i) { auto type = proto::ValueProto::INT; auto entry = property_.add_entrys(); @@ -108,6 +236,24 @@ void Property::SetInt64(const std::string &name, const int64_t &i) { VLOG(3) << "Property: set_int " << i << " name: " << name; } +int64_t Property::GetInt64(const std::string &name) { + for (int i = 0; i < Size(); i++) { + auto e = property_.entrys(i); + + if (e.has_name() && e.name() == name) { + PADDLE_ENFORCE(e.has_type() && e.type() == proto::ValueProto::INT, + phi::errors::PreconditionNotMet( + "JIT::Property GetInt64: idx=%d type is not int.", i)); + + return e.i(); + } + } + + PADDLE_THROW(phi::errors::NotFound( + "JIT::Property GetInt64: name: %s not found", name)); + return 0; +} + void Property::SetInt64s(const std::vector &v) { auto type = proto::ValueProto::INTS; auto entry = property_.add_entrys(); @@ -130,6 +276,31 @@ void Property::SetInt64s(const std::string &name, VLOG(3) << "Property: set_ints " << v[0] << " name: " << name; } +std::vector Property::GetInt64s(const std::string &name) { + for (int i = 0; i < Size(); i++) { + auto e = property_.entrys(i); + + if (e.has_name() && e.name() == name) { + PADDLE_ENFORCE( + e.has_type() && e.type() == proto::ValueProto::INTS, + phi::errors::PreconditionNotMet( + "JIT::Property GetInt64s: idx=%d type is not ints.", i)); + + auto items = e.ints(); + std::vector res; + std::transform(items.begin(), + items.end(), + std::back_inserter(res), + [](const int64_t &v) { return static_cast(v); }); + return res; + } + } + + PADDLE_THROW(phi::errors::NotFound( + "JIT::Property GetInt64s: name: %s not found", name)); + return {}; +} + void Property::SetString(const std::string &s) { auto type = proto::ValueProto::STRING; auto entry = property_.add_entrys(); @@ -147,6 +318,24 @@ void Property::SetString(const std::string &name, const std::string &s) { VLOG(3) << "Property: set_string " << s << " name: " << name; } +std::string Property::GetString(const std::string &name) { + for (int i = 0; i < Size(); i++) { + auto e = property_.entrys(i); + + if (e.has_name() && e.name() == name) { + PADDLE_ENFORCE( + e.has_type() && e.type() == proto::ValueProto::STRING, + phi::errors::PreconditionNotMet( + "JIT::Property GetString: idx=%d type is not string.", i)); + return e.s(); + } + } + + PADDLE_THROW(phi::errors::NotFound( + "JIT::Property GetString: name: %s not found", name)); + return {}; +} + void Property::SetStrings(const std::vector &v) { auto type = proto::ValueProto::STRINGS; auto entry = property_.add_entrys(); @@ -169,5 +358,25 @@ void Property::SetStrings(const std::string &name, VLOG(3) << "Property: set_strings " << v[0] << " name: " << name; } +std::vector Property::GetStrings(const std::string &name) { + for (int i = 0; i < Size(); i++) { + auto e = property_.entrys(i); + + if (e.has_name() && e.name() == name) { + PADDLE_ENFORCE( + e.has_type() && e.type() == proto::ValueProto::STRINGS, + phi::errors::PreconditionNotMet( + "JIT::Property GetStrings: idx=%d type is not strings.", i)); + + auto items = e.strings(); + return std::vector(items.begin(), items.end()); + } + } + + PADDLE_THROW(phi::errors::NotFound( + "JIT::Property GetStrings: name: %s not found", name)); + return {}; +} + } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/property.h b/paddle/fluid/jit/property.h index 10b021ba5b..4a3fae89f6 100644 --- a/paddle/fluid/jit/property.h +++ b/paddle/fluid/jit/property.h @@ -17,13 +17,19 @@ #include #include #include +#include #include #include "paddle/fluid/jit/property.pb.h" namespace paddle { +namespace framework { +class Variable; +} namespace jit { +using Variable = paddle::framework::Variable; + class Property { public: Property() {} @@ -43,33 +49,54 @@ class Property { const proto::PropertyVals *Proto() const { return &property_; } int Size() const; + std::vector Names() const; + std::unordered_map> Values(); void SetFloat(const float &f); void SetFloat(const std::string &name, const float &f); + float GetFloat(const std::string &name) const; + float GetFloat(const int &idx) const; + void SetFloats(const std::vector &v); void SetFloats(const std::string &name, const std::vector &v); - float GetFloat(const std::string &name) const; - float GetFloat(const int &idx) const; + std::vector GetFloats(const std::string &name); void SetInt64(const int64_t &i); void SetInt64(const std::string &name, const int64_t &i); + int64_t GetInt64(const std::string &name); + void SetInt64s(const std::vector &v); void SetInt64s(const std::string &name, const std::vector &v); + std::vector GetInt64s(const std::string &name); + void SetString(const std::string &s); void SetString(const std::string &name, const std::string &s); + std::string GetString(const std::string &name); + void SetStrings(const std::vector &v); void SetStrings(const std::string &name, const std::vector &v); + std::vector GetStrings(const std::string &name); + + void Deserialization(const std::string &path); + + void Serialization(const std::string &path); + // The Id() and OriginalId() are only used for auto parallel. uint64_t Id() const { return id_; } uint64_t OriginalId() const { return original_id_; } void SetOriginalId(uint64_t original_id) { original_id_ = original_id; } + private: + void DeserializationFromString(const std::string &str); + + std::string SerializationToString(); + private: proto::PropertyVals property_; diff --git a/paddle/fluid/jit/serializer.cc b/paddle/fluid/jit/serializer.cc index c24995f711..a9bd5676ad 100644 --- a/paddle/fluid/jit/serializer.cc +++ b/paddle/fluid/jit/serializer.cc @@ -37,7 +37,6 @@ Layer Deserializer::operator()(const std::string& path, // set is ordered std::set param_names_set; std::vector> infos; - Name2VariableMap params_dict; for (auto& it : pdmodel_paths) { auto& func_name = it.first; auto program_desc = LoadProgram(it.second); @@ -56,19 +55,27 @@ Layer Deserializer::operator()(const std::string& path, func_name, persist_var_names, program_desc)); } + Name2VariableMap params_dict; + Name2VariableMap attrs_dict; ReadTensorData(path + PDPARAMS_SUFFIX, param_names_set, place, ¶ms_dict); - // ReadAttributeData(); - Layer layer = Layer(params_dict, place); + if (utils::FileExists(path + PROPERTY_SUFFIX)) { + ReadAttributeData(path + PROPERTY_SUFFIX, &attrs_dict); + VLOG(3) << "Read Property Success!"; + } + + Layer layer = Layer(params_dict, attrs_dict, place); for (auto& info : infos) { if (FLAGS_jit_engine_type == "Executor") { - VLOG(3) << "Add function type: ExecutorFunction."; + VLOG(3) << "Add function type: ExecutorFunction. name: " + << info->FunctionName(); layer.SetFunction( info->FunctionName(), utils::MakeFunction(info, params_dict, place)); } else if (FLAGS_jit_engine_type == "PE") { - VLOG(3) << "Add function type: PEFunction."; + VLOG(3) << "Add function type: PEFunction. name: " + << info->FunctionName(); layer.SetFunction( info->FunctionName(), utils::MakeFunction(info, params_dict, place)); @@ -99,7 +106,13 @@ void Deserializer::ReadTensorData(const std::string& file_name, } void Deserializer::ReadAttributeData(const std::string& file_path, - Name2VariableMap* attrs_dict) const {} + Name2VariableMap* attrs_dict) const { + VLOG(3) << "ReadPropertyData from: " << file_path; + Property p; + p.Deserialization(file_path); + *attrs_dict = static_cast(p.Values()); + return; +} framework::ProgramDesc Deserializer::LoadProgram(const std::string& file_name) { VLOG(3) << "LoadProgram from: " << file_name; diff --git a/paddle/fluid/jit/serializer_utils.h b/paddle/fluid/jit/serializer_utils.h index 97850504d9..0afe6be3fe 100644 --- a/paddle/fluid/jit/serializer_utils.h +++ b/paddle/fluid/jit/serializer_utils.h @@ -26,6 +26,7 @@ class VarDesc; namespace jit { static const char PDMODEL_SUFFIX[] = ".pdmodel"; static const char PDPARAMS_SUFFIX[] = ".pdiparams"; +static const char PROPERTY_SUFFIX[] = ".meta"; namespace utils { bool IsPersistable(framework::VarDesc* desc_ptr); -- GitLab