未验证 提交 0aa344f0 编写于 作者: H Hui Zhang 提交者: GitHub

[jit] jit support property.proto (#44337)

* add property.proto, can compiled

* property get and deserilize

* support get float

* format code

* format code

* add unittest

* add more set method

* fix grammar error

* Update paddle/fluid/jit/property.h
Co-authored-by: NAurelius84 <zhangliujie@baidu.com>

* Update paddle/fluid/jit/property.cc
Co-authored-by: NAurelius84 <zhangliujie@baidu.com>

* Update paddle/fluid/jit/property.cc
Co-authored-by: NAurelius84 <zhangliujie@baidu.com>

* Update paddle/fluid/jit/property.cc
Co-authored-by: NAurelius84 <zhangliujie@baidu.com>

* fix comment

* fix error throw

* fix property save unit test

* fix error info

* fix copyright and header import

* reorder jit property tensor datatype
Co-authored-by: NAurelius84 <zhangliujie@baidu.com>
上级 7ab0e336
......@@ -52,3 +52,10 @@ if(WITH_TESTING AND NOT WIN32)
DEPS ${JIT_DEPS})
add_dependencies(layer_test jit_download_program)
endif()
proto_library(paddle_jit_property_proto SRCS property.proto)
cc_library(
jit_property
SRCS property.cc
DEPS paddle_jit_property_proto)
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/jit/property.h"
#include "glog/logging.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
namespace paddle {
namespace jit {
int Property::Size() const { return property_.entrys_size(); }
void Property::SetFloat(const float &f) {
auto type = proto::ValueProto::FLOAT;
auto entry = property_.add_entrys();
entry->set_type(type);
entry->set_f(f);
VLOG(3) << "Property: set_float " << f;
}
void Property::SetFloat(const std::string &name, const float &f) {
auto type = proto::ValueProto::FLOAT;
auto entry = property_.add_entrys();
entry->set_name(name);
entry->set_type(type);
entry->set_f(f);
VLOG(3) << "Property: set_float " << f << " name: " << name;
}
float Property::GetFloat(const std::string &name) const {
for (int i = 0; i < Size(); i++) {
auto e = property_.entrys(i);
if (e.has_name() && e.name() == name) {
return e.f();
}
}
PADDLE_THROW(phi::errors::NotFound(
"JIT::Property GetFloat: name: %s not found", name));
return 0;
}
float Property::GetFloat(const int &idx) const {
PADDLE_ENFORCE_EQ(
idx < Size() && idx >= 0,
true,
phi::errors::OutOfRange(
"JIT::Property GetFloat: idx=%d out of range %d", idx, Size()));
auto e = property_.entrys(idx);
if (e.has_f()) {
return e.f();
}
PADDLE_THROW(phi::errors::InvalidArgument(
"JIT::Property GetFloat: input idx (%d) element is not a float.", idx));
return 0;
}
void Property::SetFloats(const std::vector<float> &v) {
auto type = proto::ValueProto::FLOATS;
auto entry = property_.add_entrys();
entry->set_type(type);
for (auto i : v) {
entry->add_floats(i);
}
VLOG(3) << "Property: set_floats with length: " << v.size();
}
void Property::SetFloats(const std::string &name, const std::vector<float> &v) {
auto type = proto::ValueProto::FLOATS;
auto entry = property_.add_entrys();
entry->set_name(name);
entry->set_type(type);
for (auto i : v) {
entry->add_floats(i);
}
VLOG(3) << "Property: set_floats with length " << v.size()
<< " for name: " << name;
}
void Property::SetInt64(const int64_t &i) {
auto type = proto::ValueProto::INT;
auto entry = property_.add_entrys();
entry->set_type(type);
entry->set_i(i);
VLOG(3) << "Property: set_int " << i;
}
void Property::SetInt64(const std::string &name, const int64_t &i) {
auto type = proto::ValueProto::INT;
auto entry = property_.add_entrys();
entry->set_name(name);
entry->set_type(type);
entry->set_i(i);
VLOG(3) << "Property: set_int " << i << " name: " << name;
}
void Property::SetInt64s(const std::vector<int64_t> &v) {
auto type = proto::ValueProto::INTS;
auto entry = property_.add_entrys();
entry->set_type(type);
for (auto e : v) {
entry->add_ints(e);
}
VLOG(3) << "Property: set_ints " << v.size();
}
void Property::SetInt64s(const std::string &name,
const std::vector<int64_t> &v) {
auto type = proto::ValueProto::INTS;
auto entry = property_.add_entrys();
entry->set_name(name);
entry->set_type(type);
for (auto i : v) {
entry->add_ints(i);
}
VLOG(3) << "Property: set_ints " << v[0] << " name: " << name;
}
void Property::SetString(const std::string &s) {
auto type = proto::ValueProto::STRING;
auto entry = property_.add_entrys();
entry->set_type(type);
entry->set_s(s);
VLOG(3) << "Property: set_string with value : " << s;
}
void Property::SetString(const std::string &name, const std::string &s) {
auto type = proto::ValueProto::STRING;
auto entry = property_.add_entrys();
entry->set_name(name);
entry->set_type(type);
entry->set_s(s);
VLOG(3) << "Property: set_string " << s << " name: " << name;
}
void Property::SetStrings(const std::vector<std::string> &v) {
auto type = proto::ValueProto::STRINGS;
auto entry = property_.add_entrys();
entry->set_type(type);
for (auto i : v) {
entry->add_strings(i);
}
VLOG(3) << "Property: set_strings " << v.size();
}
void Property::SetStrings(const std::string &name,
const std::vector<std::string> &v) {
auto type = proto::ValueProto::STRINGS;
auto entry = property_.add_entrys();
entry->set_name(name);
entry->set_type(type);
for (auto i : v) {
entry->add_strings(i);
}
VLOG(3) << "Property: set_strings " << v[0] << " name: " << name;
}
} // namespace jit
} // namespace paddle
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <atomic>
#include <string>
#include <vector>
#include "paddle/fluid/jit/property.pb.h"
namespace paddle {
namespace jit {
class Property {
public:
Property() {}
// Explicitly implement the copy constructor for auto parallel
explicit Property(const Property &other)
: property_(other.property_), original_id_(other.original_id_) {}
Property &operator=(const Property &other) {
property_ = other.property_;
original_id_ = other.original_id_;
return *this;
}
proto::PropertyVals *Proto() { return &property_; }
const proto::PropertyVals *Proto() const { return &property_; }
int Size() const;
void SetFloat(const float &f);
void SetFloat(const std::string &name, const float &f);
void SetFloats(const std::vector<float> &v);
void SetFloats(const std::string &name, const std::vector<float> &v);
float GetFloat(const std::string &name) const;
float GetFloat(const int &idx) const;
void SetInt64(const int64_t &i);
void SetInt64(const std::string &name, const int64_t &i);
void SetInt64s(const std::vector<int64_t> &v);
void SetInt64s(const std::string &name, const std::vector<int64_t> &v);
void SetString(const std::string &s);
void SetString(const std::string &name, const std::string &s);
void SetStrings(const std::vector<std::string> &v);
void SetStrings(const std::string &name, const std::vector<std::string> &v);
// The Id() and OriginalId() are only used for auto parallel.
uint64_t Id() const { return id_; }
uint64_t OriginalId() const { return original_id_; }
void SetOriginalId(uint64_t original_id) { original_id_ = original_id; }
private:
proto::PropertyVals property_;
// This thread-safe implementation seems to be redudent since the neural
// networks are usually constructed in a single thread.
static uint64_t GenerateId() {
static std::atomic<std::uint64_t> uid{0};
return ++uid;
}
// Note: the id_ is unique for all Property (only for auto parallel).
uint64_t id_ = GenerateId();
// Note: the orignal_id_ is used for referring to the original Property
// that the current Property is built from (only for auto parallel).
// The default original_id_ is same as the id_, which means the
// current Property is not built from the other one.
uint64_t original_id_ = id_;
};
} // namespace jit
} // namespace paddle
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
syntax = "proto2";
package paddle.jit.proto;
message TensorProto {
enum DataType {
UNDEFINED = 0;
BOOL = 1; // bool
UINT8 = 2; // uint8_t
INT8 = 3; // int8_t
UINT16 = 4; // uint16_t
INT16 = 5; // int16_t
UINT32 = 6; // uint32_t
INT32 = 7; // int32_t
UINT64 = 8; // uint64_t
INT64 = 9; // int64_t
FLOAT = 10; // float
DOUBLE = 11;
COMPLEX64 = 12; // complex with float32 real and imaginary components
COMPLEX128 = 13; // complex with float64 real and imaginary components
STRING = 14; // string
// IEEE754 half-precision floating-point format (16 bits wide).
// This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits.
FLOAT16 = 15;
// Non-IEEE floating-point format based on IEEE754 single-precision
// floating-point number truncated to 16 bits.
// This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
BFLOAT16 = 16;
// Future extensions go here.
}
optional bool stop_gradient = 1;
// The shape of the tensor.
repeated int64 dims = 2;
// The data type of the tensor.
optional int32 data_type = 3;
// Tensor content must be organized in row-major order.
// For float and complex64 values
// Complex64 tensors are encoded as a single array of floats,
// with the real components appearing in odd numbered positions,
// and the corresponding imaginary component appearing in the
// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
// is encoded as [1.0, 2.0 ,3.0 ,4.0]
// When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
repeated float float_data = 4 [packed = true];
// For int32, uint8, int8, uint16, int16, bool, and float16 values
// float16 values must be bit-wise converted to an uint16_t prior
// to writing to the buffer.
// When this field is present, the data_type field MUST be
// INT32, INT16, INT8, UINT16, UINT8, BOOL, or FLOAT16
repeated int32 int32_data = 5 [packed = true];
// For strings.
// Each element of string_data is a UTF-8 encoded Unicode
// string. No trailing null, no leading BOM. The protobuf "string"
// scalar type is not used to match ML community conventions.
// When this field is present, the data_type field MUST be STRING
repeated bytes string_data = 6;
// For int64.
// When this field is present, the data_type field MUST be INT64
repeated int64 int64_data = 7 [packed = true];
// For double
// Complex128 tensors are encoded as a single array of doubles,
// with the real components appearing in odd numbered positions,
// and the corresponding imaginary component appearing in the
// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
// is encoded as [1.0, 2.0 ,3.0 ,4.0]
// When this field is present, the data_type field MUST be DOUBLE or COMPLEX128
repeated double double_data = 8 [packed = true];
// For uint64 and uint32 values
// When this field is present, the data_type field MUST be
// UINT32 or UINT64
repeated uint64 uint64_data = 9 [packed = true];
// Serializations can either use one of the fields above, or use this
// raw bytes field. The only exception is the string case, where one is
// required to store the content in the repeated bytes string_data field.
//
// When this raw_data field is used to store tensor value, elements MUST
// be stored in as fixed-width, little-endian order.
// Floating-point data types MUST be stored in IEEE 754 format.
// Complex64 elements must be written as two consecutive FLOAT values, real component first.
// Complex128 elements must be written as two consecutive DOUBLE values, real component first.
// Boolean type MUST be written one byte per tensor element (00000001 for true, 00000000 for false).
//
// Note: the advantage of specific field rather than the raw_data field is
// that in some cases (e.g. int data), protobuf does a better packing via
// variable length storage, and may lead to smaller binary footprint.
// When this field is present, the data_type field MUST NOT be STRING or UNDEFINED
optional bytes raw_data = 10;
}
message ValueProto {
enum AttributeType {
UNDEFINED = 0;
FLOAT = 1;
INT = 2;
STRING = 3;
TENSOR = 4;
FLOATS = 6;
INTS = 7;
STRINGS = 8;
TENSORS = 9;
}
optional string name = 1;
optional AttributeType type = 2; // discriminator that indicates which field below is in use
// Exactly ONE of the following fields must be present
optional float f = 3; // float
optional int64 i = 4; // int
optional bytes s = 5; // UTF-8 string
optional TensorProto t = 6; // tensor value
repeated float floats = 7; // list of floats
repeated int64 ints = 8; // list of ints
repeated bytes strings = 9; // list of UTF-8 strings
repeated TensorProto tensors = 10; // list of tensors
}
message PropertyVals {
repeated ValueProto entrys=1;
}
......@@ -18,6 +18,7 @@
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/jit/property.h"
#include "paddle/fluid/jit/layer.h"
......@@ -47,10 +48,12 @@ class Deserializer {
const phi::Place& place,
Name2VariableMap* params_dict) const;
// property pb
void ReadAttributeData(const std::string& file_path,
Name2VariableMap* attrs_dict) const;
// void ReadExtraInfo(const std::string& file_name) const;
// void ReadByteCode(const std::string& file_name) const;
framework::ProgramDesc LoadProgram(const std::string& file_name);
......
......@@ -39,7 +39,8 @@ set(PYBIND_DEPS
phi_utils
tcp_store
new_profiler
jit_layer)
jit_layer
jit_property)
if(WITH_PSCORE)
set(PYBIND_DEPS ${PYBIND_DEPS} ps_service)
......
......@@ -25,8 +25,11 @@ limitations under the License. */
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/jit/property.h"
#include "paddle/fluid/pybind/pybind_boost_headers.h"
namespace py = pybind11;
namespace paddle {
namespace pybind {
......@@ -34,6 +37,7 @@ PyTypeObject *g_vartype_pytype = nullptr;
PyTypeObject *g_blockdesc_pytype = nullptr;
namespace pd = paddle::framework;
namespace jit = paddle::jit;
template <typename T>
static pybind11::bytes SerializeMessage(
......@@ -47,6 +51,15 @@ static pybind11::bytes SerializeMessage(
return retv;
}
template <typename T>
static void DeserializeMessage(T *self, const std::string &str) {
PADDLE_ENFORCE_EQ(
self->Proto()->ParsePartialFromString(str),
true,
platform::errors::InvalidArgument("Failed to parse pb from string"));
return;
}
// Bind Methods
void BindProgramDesc(pybind11::module *m) {
pybind11::class_<pd::ProgramDesc>(*m, "ProgramDesc", "")
......@@ -342,5 +355,96 @@ void BindOpDesc(pybind11::module *m) {
.def("outputs", &pd::OpDesc::Outputs);
}
// Serialize Class Property
void BindJitProperty(pybind11::module *m) {
pybind11::class_<jit::Property> property(*m, "Property");
property
.def(
"__init__",
[](jit::Property &self) { new (&self) jit::Property(); },
pybind11::return_value_policy::reference)
.def("size", &jit::Property::Size)
.def("set_float",
py::overload_cast<const float &>(&jit::Property::SetFloat),
"set float",
py::arg("val"))
.def("set_float",
py::overload_cast<const std::string &, const float &>(
&jit::Property::SetFloat),
"set float",
py::arg("name"),
py::arg("var"))
.def("get_float",
py::overload_cast<const int &>(&jit::Property::GetFloat, py::const_))
.def("get_float",
py::overload_cast<const std::string &>(&jit::Property::GetFloat,
py::const_))
.def("set_floats",
py::overload_cast<const std::vector<float> &>(
&jit::Property::SetFloats),
"set list of float",
py::arg("vals"))
.def("set_floats",
py::overload_cast<const std::string &, const std::vector<float> &>(
&jit::Property::SetFloats),
"set list of float",
py::arg("name"),
py::arg("val"))
.def("set_int",
py::overload_cast<const int64_t &>(&jit::Property::SetInt64),
"set int",
py::arg("val"))
.def("set_int",
py::overload_cast<const std::string &, const int64_t &>(
&jit::Property::SetInt64),
"set int",
py::arg("name"),
py::arg("val"))
.def("set_ints",
py::overload_cast<const std::vector<int64_t> &>(
&jit::Property::SetInt64s),
"set list of int",
py::arg("vals"))
.def("set_ints",
py::overload_cast<const std::string &, const std::vector<int64_t> &>(
&jit::Property::SetInt64s),
"set list of int",
py::arg("name"),
py::arg("val"))
.def("set_string",
py::overload_cast<const std::string &>(&jit::Property::SetString),
"set string",
py::arg("val"))
.def("set_string",
py::overload_cast<const std::string &, const std::string &>(
&jit::Property::SetString),
"set string",
py::arg("name"),
py::arg("val"))
.def("set_strings",
py::overload_cast<const std::vector<std::string> &>(
&jit::Property::SetStrings),
"set list of string",
py::arg("vals"))
.def("set_strings",
py::overload_cast<const std::string &,
const std::vector<std::string> &>(
&jit::Property::SetStrings),
"set list of string",
py::arg("name"),
py::arg("val"))
.def("set_tensor",
[](const pd::VarDesc &tensor, const std::string name) {
throw platform::errors::Unimplemented("Not implement set_tensor.");
})
.def(
"set_tensors",
[](const pybind11::list &tensors, const std::string name) {
throw platform::errors::Unimplemented("Not implement set_tensors.");
})
.def("serialize_to_string", SerializeMessage<jit::Property>)
.def("parse_from_string", DeserializeMessage<jit::Property>);
}
} // namespace pybind
} // namespace paddle
......@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#if defined(_MSC_VER)
......@@ -34,6 +35,7 @@ void BindBlockDesc(pybind11::module* m);
void BindVarDsec(pybind11::module* m);
void BindOpDesc(pybind11::module* m);
void BindProcessMeshDesc(pybind11::module* m);
void BindJitProperty(pybind11::module* m);
} // namespace pybind
} // namespace paddle
......@@ -1703,6 +1703,7 @@ All parameter, weight, gradient are variables in Paddle.
BindProcessMeshDesc(&m);
BindFleetExecutor(&m);
BindTCPStore(&m);
BindJitProperty(&m);
py::class_<framework::LoDRankTable>(m, "LodRankTable")
.def("items", [](framework::LoDRankTable &table) {
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy
import unittest
import paddle
class TestPropertySave(unittest.TestCase):
"""test jit property save
"""
def setUp(self):
a = paddle.framework.core.Property()
a.set_float('a', 1.0)
a.set_floats('b', [1.02, 2.3, 4.23])
b = paddle.framework.core.Property()
b.parse_from_string(a.serialize_to_string())
self.a = a
self.b = b
def test_property_save(self):
self.assertEqual(self.a.get_float('a'), self.b.get_float('a'))
self.assertEqual(self.a.get_float(0), 1.0)
def test_size(self):
self.assertEqual(self.b.size(), 2)
self.assertEqual(self.a.size(), 2)
def test_load_float(self):
with self.assertRaises(ValueError):
self.a.get_float(1)
def test_set_float_wo_name(self):
"""test save without name
"""
a = paddle.framework.core.Property()
a.set_float(10.0)
self.assertEqual(a.get_float(0), 10.0)
def test_set(self):
"""test propety set.
"""
try:
a = paddle.framework.core.Property()
a.set_float(10.0)
a.set_float('float', 10.0)
a.set_floats([5.0, 4.0, 3.0])
a.set_floats('floats', [5.0, 4.0, 3.0])
a.set_int(5)
a.set_int('int', 5)
a.set_ints([1, 2, 3])
a.set_ints('ints', [1, 2, 3])
a.set_string("hello")
a.set_string("str", "hello")
a.set_strings(["1", "2", "3"])
a.set_strings('strs', ["1", "2", "3"])
except Exception as e:
self.assertEqual(False, True)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册