From a76f7ed2eb02c9beb5e7b16ff7fede0f13477df8 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 14 Jul 2017 15:04:44 +0800 Subject: [PATCH] Get OpProtos in Python * PyBind and SWIG of paddle cannot be load in a single Python process, lazy import all SWIG library of Paddle. Otherwise, the glog, gflags are imported twice in a same Python process. * Note that all PyBind11 return C++ std::string as an unicode. For protobuf, it is need be cast to `str` before use them. * Add unit test for Get `OpProtos` --- paddle/pybind/pybind.cc | 7 ++++++- python/paddle/v2/__init__.py | 4 +--- python/paddle/v2/data_feeder.py | 1 - python/paddle/v2/event.py | 3 +-- .../paddle/v2/framework/create_op_creation_methods.py | 3 +-- python/paddle/v2/inference.py | 4 ++-- python/paddle/v2/optimizer.py | 5 +++-- python/paddle/v2/parameters.py | 5 +++-- python/paddle/v2/trainer.py | 11 +++++------ python/setup.py.in | 3 ++- 10 files changed, 24 insertions(+), 22 deletions(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 6a1e9291cb4..c1a025ed049 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -16,6 +16,8 @@ limitations under the License. */ #include #include #include +#include +#include #include namespace py = pybind11; @@ -47,11 +49,14 @@ All parameter, weight, gradient are variables in Paddle. &pd::Scope::CreateVariable, py::return_value_policy::reference); + //! @note: Be careful! PyBind will return std::string as an unicode, not + //! Python str. If you want a str object, you should cast them in Python. m.def("get_all_op_protos", []() -> std::vector { auto& protos = pd::OpRegistry::protos(); std::vector ret_values; - ret_values.reserve(protos.size()); for (auto it = protos.begin(); it != protos.end(); ++it) { + PADDLE_ENFORCE(it->second.IsInitialized(), + "OpProto must all be initialized"); ret_values.emplace_back(); PADDLE_ENFORCE(it->second.SerializeToString(&ret_values.back()), "Serialize OpProto Error. This could be a bug of Paddle."); diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 3ba5c318718..3c75ca4c3ab 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -20,7 +20,6 @@ import trainer import event import data_type import topology -import data_feeder import networks import evaluator from . import dataset @@ -31,7 +30,6 @@ import op import pooling import inference import networks -import py_paddle.swig_paddle as api import minibatch import plot import image @@ -47,7 +45,6 @@ __all__ = [ 'data_type', 'attr', 'pooling', - 'data_feeder', 'dataset', 'reader', 'topology', @@ -61,6 +58,7 @@ __all__ = [ def init(**kwargs): + import py_paddle.swig_paddle as api args = [] args_dict = {} # NOTE: append arguments if they are in ENV diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py index 2698251b9e1..98dfb85a0ea 100644 --- a/python/paddle/v2/data_feeder.py +++ b/python/paddle/v2/data_feeder.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from py_paddle import DataProviderConverter import collections import paddle.trainer.PyDataProvider2 as pydp2 diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py index fd6050fa339..7589cc9917f 100644 --- a/python/paddle/v2/event.py +++ b/python/paddle/v2/event.py @@ -9,8 +9,6 @@ There are: * BeginPass * EndPass """ -import py_paddle.swig_paddle as api - __all__ = [ 'EndIteration', 'BeginIteration', 'BeginPass', 'EndPass', 'TestResult' ] @@ -18,6 +16,7 @@ __all__ = [ class WithMetric(object): def __init__(self, evaluator): + import py_paddle.swig_paddle as api if not isinstance(evaluator, api.Evaluator): raise TypeError("Evaluator should be api.Evaluator type") self.__evaluator__ = evaluator diff --git a/python/paddle/v2/framework/create_op_creation_methods.py b/python/paddle/v2/framework/create_op_creation_methods.py index 14beaadc9a9..2fcdfead254 100644 --- a/python/paddle/v2/framework/create_op_creation_methods.py +++ b/python/paddle/v2/framework/create_op_creation_methods.py @@ -6,7 +6,6 @@ def get_all_op_protos(): protostrs = core.get_all_op_protos() ret_values = [] for pbstr in protostrs: - op_proto = op_proto_pb2.OpProto() - op_proto.ParseFromString(pbstr) + op_proto = op_proto_pb2.OpProto.FromString(str(pbstr)) ret_values.append(op_proto) return ret_values diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 34b73086013..40134a3270c 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -1,9 +1,7 @@ import numpy -import py_paddle.swig_paddle as api import collections import topology import minibatch -from data_feeder import DataFeeder __all__ = ['infer', 'Inference'] @@ -28,6 +26,7 @@ class Inference(object): """ def __init__(self, output_layer, parameters): + import py_paddle.swig_paddle as api topo = topology.Topology(output_layer) gm = api.GradientMachine.createFromConfigProto( topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE]) @@ -40,6 +39,7 @@ class Inference(object): self.__data_types__ = topo.data_type() def iter_infer(self, input, feeding=None): + from data_feeder import DataFeeder feeder = DataFeeder(self.__data_types__, feeding) batch_size = len(input) diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index 390c22ee552..3dec340cfb3 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -1,5 +1,3 @@ -import py_paddle.swig_paddle as swig_api - import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.trainer_config_helpers.optimizers as v1_optimizers """ @@ -26,6 +24,8 @@ class Optimizer(object): self.__opt_conf_proto__ = config_parser_utils.parse_optimizer_config( __impl__) + if swig_api is None: + raise RuntimeError("paddle.v2 currently need swig_paddle") self.__opt_conf__ = swig_api.OptimizationConfig.createFromProto( self.__opt_conf_proto__) @@ -268,6 +268,7 @@ ModelAverage = v1_optimizers.ModelAverage L2Regularization = v1_optimizers.L2Regularization if __name__ == '__main__': + import py_paddle.swig_paddle as swig_api swig_api.initPaddle('--use_gpu=false') for opt in [ Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(), diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index bbaf8bfa979..a9cba8ca0b1 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -1,5 +1,4 @@ import numpy as np -import py_paddle.swig_paddle as api from paddle.proto.ParameterConfig_pb2 import ParameterConfig import paddle.trainer.config_parser as cp import struct @@ -124,6 +123,7 @@ class Parameters(object): :return: parameter value :rtype: np.ndarray """ + import py_paddle.swig_paddle as api shape = self.get_shape(key) if len(self.__gradient_machines__) == 0: @@ -223,7 +223,7 @@ class Parameters(object): :type gradient_machine: api.GradientMachine :return: """ - + import py_paddle.swig_paddle as api if not isinstance(gradient_machine, api.GradientMachine): raise ValueError("gradient_machine should be api.GradientMachine") @@ -359,6 +359,7 @@ def __copy_parameter_to_gradient_machine__(gradient_machine, name, arr): :return: :rtype: api.Parameter """ + import py_paddle.swig_paddle as api param = __get_parameter_in_gradient_machine__(gradient_machine, name) vec = param.getBuf(api.PARAMETER_VALUE) assert isinstance(vec, api.Vector) diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 96c6c4b89a2..92fdf98e903 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -2,12 +2,6 @@ Module Trainer """ import collections -import gzip -import os - -import py_paddle.swig_paddle as api - -from data_feeder import DataFeeder from topology import Topology from . import event as v2_event from . import optimizer as v2_optimizer @@ -59,6 +53,7 @@ class SGD(object): if not isinstance(update_equation, v2_optimizer.Optimizer): raise TypeError("update equation parameter must be " "paddle.v2.optimizer.Optimizer") + import py_paddle.swig_paddle as api topology = Topology(cost, extra_layers=extra_layers) self.__optimizer__ = update_equation self.__topology__ = topology @@ -124,6 +119,8 @@ class SGD(object): :type feeding: dict|list :return: """ + import py_paddle.swig_paddle as api + from data_feeder import DataFeeder if event_handler is None: event_handler = default_event_handler __check_train_args__(**locals()) @@ -187,6 +184,8 @@ class SGD(object): :type feeding: dict :return: """ + import py_paddle.swig_paddle as api + from data_feeder import DataFeeder feeder = DataFeeder(self.__data_types__, feeding) evaluator = self.__gradient_machine__.makeEvaluator() out_args = api.Arguments.createArguments(0) diff --git a/python/setup.py.in b/python/setup.py.in index 271ee6e5526..b1041f6102a 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -19,7 +19,8 @@ setup_requires=["requests", "recordio", "matplotlib", "rarfile", - "scipy>=0.19.0"] + "scipy>=0.19.0", + "nltk"] if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: setup_requires+=["opencv-python"] -- GitLab