diff --git a/CMakeLists.txt b/CMakeLists.txt index bbf3acb8ad26f59e4ec99db4c350e060c8368975..b4a700f974405094cbc78fb8326a8c8e40b7b181 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,6 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) -option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF) option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF) @@ -176,7 +175,6 @@ include(external/python) # download, build, install python include(external/openblas) # download, build, install openblas include(external/mkldnn) # download, build, install mkldnn include(external/ngraph) # download, build, install nGraph -include(external/swig) # download, build, install swig include(external/boost) # download boost include(external/any) # download libn::any include(external/eigen) # download eigen3 diff --git a/cmake/external/swig.cmake b/cmake/external/swig.cmake deleted file mode 100644 index de07703695eb14e76eedd3758d55cb98edd1e02b..0000000000000000000000000000000000000000 --- a/cmake/external/swig.cmake +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -IF(NOT WITH_SWIG_PY) - return() -ENDIF() - -FIND_PACKAGE(SWIG) - -IF(NOT SWIG_FOUND) - # build swig as an external project - INCLUDE(ExternalProject) - - SET(SWIG_SOURCES_DIR ${THIRD_PARTY_PATH}/swig) - SET(SWIG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/swig) - SET(SWIG_TARGET_VERSION "3.0.2") - SET(SWIG_DOWNLOAD_SRC_MD5 "62f9b0d010cef36a13a010dc530d0d41") - SET(SWIG_DOWNLOAD_WIN_MD5 "3f18de4fc09ab9abb0d3be37c11fbc8f") - - IF(WIN32) - # swig.exe available as pre-built binary on Windows: - ExternalProject_Add(swig - URL http://prdownloads.sourceforge.net/swig/swigwin-${SWIG_TARGET_VERSION}.zip - URL_MD5 ${SWIG_DOWNLOAD_WIN_MD5} - SOURCE_DIR ${SWIG_SOURCES_DIR} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - UPDATE_COMMAND "" - ) - SET(SWIG_DIR ${SWIG_SOURCES_DIR} CACHE FILEPATH "SWIG Directory" FORCE) - SET(SWIG_EXECUTABLE ${SWIG_SOURCES_DIR}/swig.exe CACHE FILEPATH "SWIG Executable" FORCE) - ELSE(WIN32) - # swig uses bison find it by cmake and pass it down - FIND_PACKAGE(BISON) - - # From SWIG configure - ExternalProject_Add(swig - GIT_REPOSITORY https://github.com/swig/swig.git - GIT_TAG rel-3.0.10 - PREFIX ${SWIG_SOURCES_DIR} - CONFIGURE_COMMAND cd && ./autogen.sh && ./configure - --prefix=${SWIG_INSTALL_DIR} --without-pcre - BUILD_COMMAND cd && make - INSTALL_COMMAND cd && make install - UPDATE_COMMAND "" - ) - - SET(SWIG_DIR ${SWIG_INSTALL_DIR}/share/swig/${SWIG_TARGET_VERSION}) - SET(SWIG_EXECUTABLE ${SWIG_INSTALL_DIR}/bin/swig) - ENDIF(WIN32) - - LIST(APPEND external_project_dependencies swig) -ENDIF(NOT SWIG_FOUND) diff --git a/paddle/py_paddle/.gitignore b/paddle/py_paddle/.gitignore deleted file mode 100644 index 80d1f76fbc05627e21e334af55d63a4a534434c6..0000000000000000000000000000000000000000 --- a/paddle/py_paddle/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -swig_paddle.py -_swig_paddle.so diff --git a/paddle/py_paddle/__init__.py b/paddle/py_paddle/__init__.py deleted file mode 100644 index 5504d1d50c523315036bfaaf6641c5216269a5e5..0000000000000000000000000000000000000000 --- a/paddle/py_paddle/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from util import DataProviderWrapperConverter -from dataprovider_converter import DataProviderConverter - -__all__ = [ - 'paddle', - 'DataProviderConverter', - 'DataProviderWrapperConverter', # for deprecated usage. - 'loadParameterFile' -] -util.monkeypatches() diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py deleted file mode 100644 index 43614b9779d21795f1f274589ea93639e923ce75..0000000000000000000000000000000000000000 --- a/paddle/py_paddle/dataprovider_converter.py +++ /dev/null @@ -1,309 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.trainer.PyDataProvider2 as dp2 -import collections -import swig_paddle -import numpy -import itertools -from functools import reduce - -__all__ = ['DataProviderConverter'] - - -class IScanner(object): - """ - The scanner will scan Python object two passes, then convert it to Paddle's - argument. - - In the first pass, `pre_scan` will be invoked by every data instance, and - then invoke `finish_pre_scan` to arguments. And the second pass do the same - thing except the functions changed to `scan`, `finish_scan`. - - During the first pass, a scanner may count the shape of input matrix and - allocate memory for this argument. Then fill the data into this argument - in second pass. - """ - - def __init__(self, input_type, pos): - self.input_type = input_type - if not isinstance(self.input_type, dp2.InputType): - raise ValueError("input type should be dataprovider2.InputType") - self.pos = pos - # data_in_gpu is used to indicate whether to create argument on GPU - # or not in GPU mode. Now if using one thread (trainer_count=1), - # trainer uses NeuralNetwork which needs to create argument on GPU - # before calling forward function. So, set data_in_gpu to True. - # Otherwise, trainer uses MultiGradientMachine which will transfer - # data from CPU to GPU in the forward function, set data_in_gpu to - # False in this case. - self.data_in_gpu = swig_paddle.isUsingGpu( - ) and swig_paddle.getTrainerCount() == 1 - - def pre_scan(self, dat): - """ - First pass scan method. During this method, the scanner could count the - data number, and get the total memory size this batch would use. - - :param dat: The python object. - """ - pass - - def finish_pre_scan(self, argument): - """ - Finish first scan pass. Allocate the memory. - - :param argument: Output arguments object. - :type argument: swig_paddle.Arguments - :param dat: Output arguments object. - :type dat: The Python object, numpy.array or List. - :return: - """ - pass - - def scan(self, dat): - """ - Second pass scan method. Copy the data to arguments. - - :param dat: The python object. - """ - pass - - def finish_scan(self, argument): - """ - Finish second pass. Finalize the resources, etc. - - :param argument: Output arguments object. - :type argument: swig_paddle.Arguments - """ - pass - - -class DenseScanner(IScanner): - """ - :type __mat__: numpy.ndarray - """ - - def __init__(self, input_type, pos): - IScanner.__init__(self, input_type, pos) - self.__mat__ = None - self.__shape__ = None - self.__height__ = 0 - self.__dim__ = 0 - - def pre_scan(self, dat): - self.__height__ += 1 - if self.__shape__ is None: - self.__shape__ = numpy.array(dat).shape - if len(self.__shape__) > 3: - raise ValueError( - "The dimension of input cannot be greater than 3.") - if len(self.__shape__) == 0: - raise ValueError( - "The input should be a vector, please check your input data." - ) - self.__dim__ = reduce(lambda x, y: x * y, self.__shape__) - if len(self.__shape__) == 1 and self.__dim__ != self.input_type.dim: - raise ValueError( - "The data size must be equal to it in data layer.") - else: - if self.__shape__ != numpy.array(dat).shape: - raise ValueError( - "The data shape must be same in one mini-batch.") - - def finish_pre_scan(self, argument): - self.__mat__ = numpy.ndarray( - shape=(self.__height__, self.__dim__), dtype=numpy.float32) - self.__height__ = 0 - - def scan(self, dat): - # It's better to use NumPy array for speed. - dat = numpy.array(dat) - dat = dat.flatten() - self.__mat__[self.__height__] = dat - self.__height__ += 1 - - def finish_scan(self, argument): - assert isinstance(argument, swig_paddle.Arguments) - if self.__mat__.dtype != numpy.float32: - self.__mat__ = self.__mat__.astype(numpy.float32) - m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, - self.data_in_gpu) - argument.setSlotValue(self.pos, m) - if len(self.__shape__) > 1: - # The last-two dimenstions are the frame height and width. - # For example, the layout is CHW for 3-D feature of image. - # The H and W are the frame height and width. - h, w = self.__shape__[-2:] - argument.setSlotFrameHeight(self.pos, h) - argument.setSlotFrameWidth(self.pos, w) - self.__shape__ = None - - -class SparseBinaryScanner(IScanner): - def __init__(self, input_type, pos): - IScanner.__init__(self, input_type, pos) - self.__rows__ = [0] - self.__cols__ = [] - self.__height__ = 0 - self.__value__ = [] - - def scan(self, dat): - self.extend_cols(dat) - self.__rows__.append(len(self.__cols__)) - self.__height__ += 1 - - def extend_cols(self, dat): - self.__cols__.extend(dat) - - def finish_scan(self, argument): - assert isinstance(argument, swig_paddle.Arguments) - m = swig_paddle.Matrix.createSparse( - self.__height__, - self.input_type.dim, - len(self.__cols__), - len(self.__value__) == 0, - False, # trans - False) # TODO supoort GPU - assert isinstance(m, swig_paddle.Matrix) - m.sparseCopyFrom(self.__rows__, self.__cols__, self.__value__) - argument.setSlotValue(self.pos, m) - - -class SparseFloatScanner(SparseBinaryScanner): - def __init__(self, input_type, pos): - SparseBinaryScanner.__init__(self, input_type, pos) - - def extend_cols(self, dat): - self.__cols__.extend((x[0] for x in dat)) - self.__value__.extend((x[1] for x in dat)) - - -class IndexScanner(IScanner): - def __init__(self, input_type, pos): - IScanner.__init__(self, input_type, pos) - self.__ids__ = None - self.__idx__ = 0 - - def pre_scan(self, dat): - self.__idx__ += 1 - - def finish_pre_scan(self, argument): - self.__ids__ = [0] * self.__idx__ - self.__idx__ = 0 - - def scan(self, dat): - self.__ids__[self.__idx__] = dat - self.__idx__ += 1 - - def finish_scan(self, argument): - ids = swig_paddle.IVector.create(self.__ids__, self.data_in_gpu) - assert isinstance(argument, swig_paddle.Arguments) - argument.setSlotIds(self.pos, ids) - - -class SequenceScanner(IScanner): - def __init__(self, input_type, pos, inner_scanner, setter): - IScanner.__init__(self, input_type, pos) - self.__seq__ = [0] - self.__inner_scanner__ = inner_scanner - self.__setter__ = setter - - def pre_scan(self, dat): - for each in dat: - self.__inner_scanner__.pre_scan(each) - - def finish_pre_scan(self, argument): - self.__inner_scanner__.finish_pre_scan(argument) - - def scan(self, dat): - self.__seq__.append(self.__seq__[-1] + self.get_size(dat)) - for each in dat: - self.__inner_scanner__.scan(each) - - def finish_scan(self, argument): - seq = swig_paddle.IVector.create(self.__seq__, False) - self.__setter__(argument, self.pos, seq) - self.__inner_scanner__.finish_scan(argument) - - def get_size(self, dat): - if isinstance(self.__inner_scanner__, SequenceScanner): - return sum(self.__inner_scanner__.get_size(item) for item in dat) - else: - return len(dat) - - -class DataProviderConverter(object): - def __init__(self, input_types): - self.input_types = input_types - assert isinstance(self.input_types, collections.Sequence) - for each in self.input_types: - assert isinstance(each, dp2.InputType) - - def convert(self, dat, argument=None): - if argument is None: - argument = swig_paddle.Arguments.createArguments(0) - assert isinstance(argument, swig_paddle.Arguments) - argument.resize(len(self.input_types)) - - scanners = [ - DataProviderConverter.create_scanner(i, each_type) - for i, each_type in enumerate(self.input_types) - ] - - for each_sample in dat: - for each_step, scanner in itertools.izip(each_sample, scanners): - scanner.pre_scan(each_step) - - for scanner in scanners: - scanner.finish_pre_scan(argument) - - for each_sample in dat: - for each_step, scanner in itertools.izip(each_sample, scanners): - scanner.scan(each_step) - - for scanner in scanners: - scanner.finish_scan(argument) - - return argument - - def __call__(self, dat, argument=None): - return self.convert(dat, argument) - - @staticmethod - def create_scanner(i, each): - assert isinstance(each, dp2.InputType) - retv = None - if each.type == dp2.DataType.Dense: - retv = DenseScanner(each, i) - elif each.type == dp2.DataType.Index: - retv = IndexScanner(each, i) - elif each.type == dp2.DataType.SparseNonValue: - retv = SparseBinaryScanner(each, i) - elif each.type == dp2.DataType.SparseValue: - retv = SparseFloatScanner(each, i) - assert retv is not None - - if each.seq_type == dp2.SequenceType.SUB_SEQUENCE: - retv = SequenceScanner( - each, i, retv, - lambda a, p, seq: a.setSlotSubSequenceStartPositions(p, seq)) - - if each.seq_type in [ - dp2.SequenceType.SUB_SEQUENCE, dp2.SequenceType.SEQUENCE - ]: - retv = SequenceScanner( - each, i, retv, - lambda a, p, seq: a.setSlotSequenceStartPositions(p, seq)) - return retv diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py deleted file mode 100644 index 3ae8dbf964c68c6f01ba30cb3ac69fb6c2f08c30..0000000000000000000000000000000000000000 --- a/paddle/py_paddle/util.py +++ /dev/null @@ -1,578 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Some Useful method for py_paddle. -""" - -import swig_paddle -import os -import paddle.trainer.PyDataProviderWrapper -import paddle.proto.ParameterConfig_pb2 -import paddle.proto.ModelConfig_pb2 -import paddle.proto.TrainerConfig_pb2 -import weakref -import numpy -import struct -import sys -import copy - - -def initializePaddle(*args): - """ - To initialize paddle process. - :param args: Command line options, such as --use_gpu=0, etc. - :return: Nothing. - """ - old_argv = copy.deepcopy(sys.argv) - old_pypath = os.getenv("PYTHONPATH") - pypath = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) - if old_pypath is not None: - pypath = os.pathsep.join([pypath, old_pypath]) - os.putenv("PYTHONPATH", pypath) - args = [""] + list(args) # argv[0] is command name, it is not important. - swig_paddle.__initPaddle__(args) - sys.argv = old_argv - - -def __monkeypatch_init_paddle__(): - swig_paddle.__initPaddle__ = swig_paddle.initPaddle - swig_paddle.initPaddle = initializePaddle - - -class __ParameterCallbackWrapper__(swig_paddle.UpdateCallback): - """ - Wrap the python callable object to paddle.UpdateCallback. - - INTERNAL USE ONLY. - """ - - def __init__(self, callback): - swig_paddle.UpdateCallback.__init__(self) - self.callback = callback - - def apply(self, param): - self.callback(param) - - @staticmethod - def wrap(callback): - """ - Cast the python callable object/paddle.UpdateCallback to - swig_paddle.UpdateCallback.__disown__ - :param callback: callable or swig_paddle.UpdateCallback object. - """ - if isinstance(callback, swig_paddle.UpdateCallback): - return callback.__disown__() - elif isinstance(callback, weakref.ProxyType): - raise RuntimeError("Should not pass __disown__ object") - else: - return __ParameterCallbackWrapper__(callback).__disown__() - - -def __arguments_to_numpy__(i, arg): - assert isinstance(arg, swig_paddle.Arguments) - value = arg.getSlotValue(i) - ids = arg.getSlotIds(i) - prob = arg.getSlotIn(i) - if value is not None: - assert isinstance(value, swig_paddle.Matrix) - value = value.copyToNumpyMat() - if ids is not None: - assert isinstance(ids, swig_paddle.IVector) - ids = ids.copyToNumpyArray() - if prob is not None: - assert isinstance(prob, swig_paddle.Matrix) - prob = prob.copyToNumpyMat() - return {"value": value, "id": ids, "prob": prob} - - -def __monkeypatch_gradient_machine__(): - """ - Add some class methods to GradientMachine. - This method should be only used internally. - """ - swig_paddle.GradientMachine.loadFromConfigFile = \ - staticmethod(loadGradientMachine) - - def __matrix_to_numpy__(m): - if isinstance(m, swig_paddle.Matrix): - return m.copyToNumpyMat() - elif isinstance(m, swig_paddle.IVector): - return m.copyToNumpyArra() - else: - raise RuntimeError("Input arg should be matrix or vecotr.") - - def createFromConfigProto(protoObj, - createMode=swig_paddle.CREATE_MODE_NORMAL, - paramTypes=[ - swig_paddle.PARAMETER_VALUE, - swig_paddle.PARAMETER_GRADIENT, - swig_paddle.PARAMETER_MOMENTUM - ]): - """ - Create Gradient Machine From Proto object. - :param protoObj: Model config - :type protoObj: proto.ModelConfig_pb2.ModelConfig - :param createMode: Create Mode, default is normal. - :type createMode: int - :param paramTypes: the gradient machine parameter type. - :type paramTypes: list of int - :return: paddle.GradientMachine - """ - assert isinstance(protoObj, paddle.proto.ModelConfig) - return swig_paddle.GradientMachine.createByConfigProtoStr( - protoObj.SerializeToString(), createMode, paramTypes) - - swig_paddle.GradientMachine.createFromConfigProto = \ - staticmethod(createFromConfigProto) - - def forwardTest(self, inArgs): - """ - forwardTest. forward gradient machine in test mode, and return a numpy - matrix dict. - - :param inArgs: The input arguments - :type inArgs: paddle.Arguments - :return: A dictionary with keys ['id', 'value'], each value is a - numpy.ndarray. - """ - outArgs = swig_paddle.Arguments.createArguments(0) - self.forward(inArgs, outArgs, swig_paddle.PASS_TEST) - return [ - __arguments_to_numpy__(i, outArgs) - for i in xrange(outArgs.getSlotNum()) - ] - - swig_paddle.GradientMachine.forwardTest = forwardTest - - # Monkey patching backward - swig_paddle.GradientMachine.__backward__ = swig_paddle.GradientMachine.backward - - def backward(self, callback): - """ - GradientMachine Backward - :param callback: a callback which parameter is (paddle.Parameter) or - a paddle.UpdateCallback object. - """ - self.__backward__(__ParameterCallbackWrapper__.wrap(callback)) - - swig_paddle.GradientMachine.backward = backward - - # Monkey patching forwardBackward. - swig_paddle.GradientMachine.__forwardBackward__ = \ - swig_paddle.GradientMachine.forwardBackward - - def forwardBackward(self, - inArgs, - outArgs, - passType, - callback=swig_paddle.UpdateCallback()): - """ - GradientMachine forward backward. - :param inArgs: Input Arguments for GradientMachine. - :type inArgs: paddle.Arguments - :param outArgs: Output Arguments for GradientMachine. - :type outArgs: paddle.Arguments - :param passType: gradient machine's pass type. - :type passType: paddle.PassType - :param callback: a callable object with arguments (paddle.Parameter) or - a paddle.UpdateCallback it will be called when - backward - """ - self.__forwardBackward__(inArgs, outArgs, passType, - __ParameterCallbackWrapper__.wrap(callback)) - - swig_paddle.GradientMachine.forwardBackward = forwardBackward - - def getParameters(self): - return (self.getParameter(i) for i in xrange(self.getParameterSize())) - - swig_paddle.GradientMachine.getParameters = getParameters - - def getNonStaticParameters(self): - return (self.getNonStaticParameter(i) - for i in xrange(self.getNonStaticParameterSize())) - - swig_paddle.GradientMachine.getNonStaticParameters = getNonStaticParameters - - def getLayerOutputs(self, layerNames): - """ - getLayerOutputs. get outputs of layers and return a numpy matrix dict. - :param layerNames: layer names. - :type layerNames: string or list. - """ - if isinstance(layerNames, basestring): - layerNames = [layerNames] - elif not isinstance(layerNames, list): - raise RuntimeError("Input args shuld be string or a sting list.") - - output = dict() - for name in layerNames: - output[name] = __arguments_to_numpy__(0, self.getLayerOutput(name)) - return output - - swig_paddle.GradientMachine.getLayerOutputs = getLayerOutputs - - -def loadGradientMachine(config_filename, model_dir=None): - """ - Load a gradient machine from config file name/path. - :param config_filename: The trainer config file name/path - :param model_dir: The model parameter directory. None if same as the - directory of config_filename - :return: GradientMachine with some enhance methods. - :rtype: paddle.GradientMachine - """ - trainer_config = swig_paddle.TrainerConfig.createFromTrainerConfigFile( - config_filename) - assert isinstance(trainer_config, swig_paddle.TrainerConfig) - model_conf = trainer_config.getModelConfig() - network = swig_paddle.GradientMachine.createByModelConfig(model_conf) - assert isinstance(network, swig_paddle.GradientMachine) - if model_dir is None: - model_dir = os.path.dirname(config_filename) - network.loadParameters(model_dir) - return network - - -def loadParameterFile(fn): - """ - Load Paddle Parameter file to numpy.ndarray - :param fn: file name or file like object. - :type fn: str or file like object. - :return: numpy array - :rtype: numpy.ndarray - :raise: paddle.UnsupportError when parameter format is wrong. - """ - if isinstance(fn, str): - with open(fn, 'rb') as f: - return loadParameterFile(f) - elif hasattr(fn, 'read'): # File like object - version, = struct.unpack('i', fn.read(4)) - if version != 0: - raise swig_paddle.UnsupportError() - value_length, = struct.unpack("I", fn.read(4)) - if value_length != 4 and value_length != 8: - raise swig_paddle.UnsupportError() - dtype = 'float32' if value_length == 4 else 'float64' - param_size, = struct.unpack("L", fn.read(8)) - value = numpy.fromfile(fn, dtype) - if len(value) != param_size: - raise swig_paddle.UnsupportError() - return value - else: - raise swig_paddle.UnsupportError() - - -class DataProviderWrapperConverter(object): - """ - A class convert DataFormat from PyDataProvider Wrapper to - py_paddle.paddle.Arguemnts. - """ - - class DenseValueConverter(object): - """ - Internal class - """ - - def __init__(self, header_def): - self.__dim__ = header_def.dim - self.buf = [] - - def append(self, other): - assert len(other) == self.__dim__ - self.buf += other - - def __call__(self, slot_idx, arg): - mat = swig_paddle.Matrix.createDense(self.buf, - len(self.buf) / self.__dim__, - self.__dim__) - arg.setSlotValue(slot_idx, mat) - - class IdValueConverter(object): - """ - Internal class - """ - - def __init__(self, *args): - self.buf = [] - - def append(self, other): - assert isinstance(other, int) - self.buf.append(other) - - def __call__(self, slot_idx, arg): - arg.setSlotIds(slot_idx, swig_paddle.IVector.create(self.buf)) - - class SparseNonValueConverter(object): - """ - Internal class - """ - - def __init__(self, slot_def): - self.indices = [0] - self.cols = [] - self.dim = slot_def.dim - - def append(self, other): - self.indices.append(self.indices[-1] + len(other)) - self.cols += other - - def __call__(self, slot_idx, arg): - mat = swig_paddle.Matrix.createSparse( - len(self.indices) - 1, self.dim, len(self.cols), True) - assert isinstance(mat, swig_paddle.Matrix) - mat.sparseCopyFrom(self.indices, self.cols) - self.putIntoArg(slot_idx, arg, mat) - - def putIntoArg(self, slot_idx, arg, mat): - arg.setSlotValue(slot_idx, mat) - - class SparseValueConverter(SparseNonValueConverter): - """ - Internal class - """ - - def __init__(self, slot_def): - super(DataProviderWrapperConverter.SparseValueConverter, - self).__init__(slot_def) - self.values = [] - - def append(self, other): - super(DataProviderWrapperConverter.SparseValueConverter, - self).append(map(lambda x: x[0], other)) - self.values += map(lambda x: x[1], other) - - def __call__(self, slot_idx, arg): - mat = swig_paddle.Matrix.createSparse( - len(self.indices) - 1, self.dim, len(self.cols), False) - assert isinstance(mat, swig_paddle.Matrix) - mat.sparseCopyFrom(self.indices, self.cols, self.values) - self.putIntoArg(slot_idx, arg, mat) - - __SLOT_VALUE_CONVERTER_MAP__ = { - paddle.trainer.PyDataProviderWrapper.DenseSlot: DenseValueConverter, - paddle.trainer.PyDataProviderWrapper.IndexSlot: IdValueConverter, - paddle.trainer.PyDataProviderWrapper.SparseNonValueSlot: - SparseNonValueConverter, - paddle.trainer.PyDataProviderWrapper.SparseValueSlot: - SparseValueConverter - } - - def __init__(self, use_seq, header): - """ - Ctor - :param use_seq: True if use sequence. - :param header: List of slots type, - trainer.PyDataProviderWrapper.SlotType - """ - self.__use_seq__ = use_seq - self.__header__ = header - - def convert(self, wrapper_data, argument=None): - """ - Convert PyDataProviderWrapper format to paddle.Argument - :param wrapper_data: PyDataProviderWrapper yield's data list. - :param argument: The output paddle.Arguments. - If it is not None, it will assign data in this - arguments, else it will create new arguments. - :return: arguments that contains data. - :rtype: paddle.Arguments - """ - if argument is None: - argument = swig_paddle.Arguments.createArguments(0) - assert isinstance(argument, swig_paddle.Arguments) - argument.resize(len(self.__header__)) - - values = map( - lambda x: DataProviderWrapperConverter.__SLOT_VALUE_CONVERTER_MAP__[x.__class__](x), - self.__header__) - - if self.__use_seq__: - seq_dim = [[] for _ in xrange(self.__header__.__len__())] - seq_start_pos = [[0] for _ in xrange(self.__header__.__len__())] - - for each_sample in wrapper_data: - for slot_idx, sequence in enumerate(each_sample): - for raw_data in sequence: - values[slot_idx].append(raw_data) - seq_start_pos[slot_idx].append(seq_start_pos[slot_idx][-1] + - len(sequence)) - seq_dim[slot_idx].append(len(sequence)) - - for slot_idx in xrange(len(self.__header__)): - argument.setSlotSequenceDim( - slot_idx, swig_paddle.IVector.create(seq_dim[slot_idx])) - argument.setSlotSequenceStartPositions( - slot_idx, - swig_paddle.IVector.create(seq_start_pos[slot_idx])) - else: - for each_sample in wrapper_data: - for raw_data, value in zip(each_sample, values): - value.append(raw_data) - - for i, v in enumerate(values): - v(i, argument) - - return argument - - def __call__(self, wrapper_data, argument=None): - """ - Invoke self.convert. See documents in self.convert. - """ - return self.convert(wrapper_data, argument) - - -def __monkey_patch_protobuf_objects__(): - def ParameterConfig_toProto(self): - """ - Convert paddle.ParameterConfig to - proto.ParameterConfig_pb2.ParameterConfig - - :return: proto.ParameterConfig_pb2.ParameterConfig object. - """ - param_conf = paddle.proto.ParameterConfig_pb2.ParameterConfig() - param_conf.ParseFromString(self.toProtoString()) - return param_conf - - swig_paddle.ParameterConfig.toProto = ParameterConfig_toProto - - def OptimizationConfig_toProto(self): - """ - Convert paddle.OptimizationConfig to - proto.TrainerConfig_pb2.OptimizationConfig - - :return: proto.TrainerConfig_pb2.OptimizationConfig - """ - opt_conf = proto.TrainerConfig_pb2.OptimizationConfig() - opt_conf.ParseFromString(self.toProtoString()) - return opt_conf - - swig_paddle.OptimizationConfig.toProto = OptimizationConfig_toProto - - def OptimizationConfig_createFromProto(protoObj): - """ - Create a new paddle.OptimizationConfig from - proto.TrainerConfig_pb2.OptimizationConfig - - :param protoObj: proto.TrainerConfig_pb2.OptimizationConfig - :return: paddle.OptimizationConfig - """ - - assert isinstance(protoObj, paddle.proto.OptimizationConfig) - return swig_paddle.OptimizationConfig.createFromProtoString( - protoObj.SerializeToString()) - - swig_paddle.OptimizationConfig.createFromProto = staticmethod( - OptimizationConfig_createFromProto) - - def TrainerConfig_createFromProto(protoObj): - """ - Create a new paddle.TrainerConfig from - proto.OptimizationConfig - - :param protoObj: proto.TrainerConfig - :return: paddle.TrainerConfig - """ - assert isinstance(protoObj, paddle.proto.TrainerConfig) - return swig_paddle.TrainerConfig.createFromProtoString( - protoObj.SerializeToString()) - - swig_paddle.TrainerConfig.createFromProto = staticmethod( - TrainerConfig_createFromProto) - - -def __monkey_patch_parameter__(): - def getBufs(self): - """ - get all parameter vectors. - NOTE: the return value is a generator. Maybe you need to cast to - list or tuple or something else. - - :return: generator of all parameter vectors. - :rtype: generator - """ - return (self.getBuf(i) for i in xrange(swig_paddle.NUM_PARAMETER_TYPES)) - - swig_paddle.Parameter.getBufs = getBufs - - -def __monkey_patch_trainer__(): - swig_paddle.Trainer.__create__ = staticmethod(swig_paddle.Trainer.create) - - def Trainer_create(config, model=None): - """ - Create a trainer for model with TrainerCOnfig trainer_config - trainer_config.model_config will be ignored when model is supplied. - Trainer.trainOneBatch() and Trainer.forwardOneBatch() can be used only - when trainer_config.data_config is set. - - A typical usage for Trainer is: - .. code-block:: python - trainer = Trainer.create(trainer_config, model) - for p in xrange(num_passes) - while True: - data = get_next_batch(batch_size) - if not data: - break - trainer.trainOneDataBatch(batch_size, data) - trainer.finishTrainPass() - trainer.finishTrain() - - The trainer will take care of logging, model saving, distributed - training, etc. - - :param config: trainer configuration - :type config: paddle.proto.TrainerConfig - :param model: the model to be trained - :type model: swig_paddle.GradientMachine - :return: a trainer - :rtype swig_paddle.Trainer - - """ - assert isinstance(config, paddle.proto.TrainerConfig) - if model is not None: - assert isinstance(model, swig_paddle.GradientMachine) - return swig_paddle.Trainer.__create__( - swig_paddle.TrainerConfig.createFromProto(config), model) - - swig_paddle.Trainer.create = staticmethod(Trainer_create) - - swig_paddle.Trainer.__getForwardOutput__ = \ - swig_paddle.Trainer.getForwardOutput - - def getForwardOutput(self): - """ - Get the netword outputs from the previous trainOneBatch(), - trainOneDataBatch(), testOneDataPatch(), or forwardOneBatch() call. - - :return: list of dictionary with keys ['id', 'value'], each value is a - numpy.ndarray. - """ - outArgs = self.__getForwardOutput__() - return [ - __arguments_to_numpy__(i, outArgs) - for i in xrange(outArgs.getSlotNum()) - ] - - swig_paddle.Trainer.getForwardOutput = getForwardOutput - - -def monkeypatches(): - patches = [ - __monkeypatch_init_paddle__, __monkeypatch_gradient_machine__, - __monkey_patch_protobuf_objects__, __monkey_patch_parameter__, - __monkey_patch_trainer__ - ] - for patch in patches: - patch() diff --git a/paddle/scripts/README.md b/paddle/scripts/README.md index 277222450648e349cfff33a96fd657f03a02e597..dd3242f62baa152a8977ac8da049323a0f1abcfd 100644 --- a/paddle/scripts/README.md +++ b/paddle/scripts/README.md @@ -68,7 +68,6 @@ Users can specify the following Docker build arguments with either "ON" or "OFF" | `WITH_TESTING` | OFF | Build unit tests binaries. | | `WITH_MKL` | ON | Build with [IntelĀ® MKL](https://software.intel.com/en-us/mkl) and [IntelĀ® MKL-DNN](https://github.com/01org/mkl-dnn) support. | | `WITH_GOLANG` | OFF | Build fault-tolerant parameter server written in go. | -| `WITH_SWIG_PY` | ON | Build with SWIG python API support. | | `WITH_PYTHON` | ON | Build with python support. Turn this off if build is only for capi. | | `WITH_STYLE_CHECK` | ON | Check the code style when building. | | `PYTHON_ABI` | "" | Build for different python ABI support, can be cp27-cp27m or cp27-cp27mu | diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index cbd39d7a5d9686be71bde6c9fd7b6d03eef73e9a..a06952782b39fdf35e23b039fb21fed75213b354 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -180,7 +180,6 @@ function cmake_gen() { -DWITH_GOLANG=${WITH_GOLANG:-OFF} -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} -DWITH_PYTHON=${WITH_PYTHON:-ON} - -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} -DCUDNN_ROOT=/usr/ -DWITH_TESTING=${WITH_TESTING:-ON} -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake @@ -214,7 +213,6 @@ EOF -DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_GOLANG=${WITH_GOLANG:-OFF} \ -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \ - -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ -DWITH_PYTHON=${WITH_PYTHON:-ON} \ -DCUDNN_ROOT=/usr/ \ -DWITH_TESTING=${WITH_TESTING:-ON} \ diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 37ad77549c88166186d9bf354d6bfd4c6bef152c..59e695e6fcb66cbaed1bcc9e861df81b5f73c1ed 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -39,7 +39,6 @@ add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE}) IF(WIN32) add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python/paddle/ - COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/ COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python @@ -48,7 +47,6 @@ ELSE(WIN32) add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND touch stub.cc COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python - COMMAND cp -r ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/ COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python