提交 e000d17a 编写于 作者: T Tao Luo

remove legacy WITH_SWIG_PY option

上级 561ae9d5
......@@ -49,7 +49,6 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO
option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF)
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
......@@ -176,7 +175,6 @@ include(external/python) # download, build, install python
include(external/openblas) # download, build, install openblas
include(external/mkldnn) # download, build, install mkldnn
include(external/ngraph) # download, build, install nGraph
include(external/swig) # download, build, install swig
include(external/boost) # download boost
include(external/any) # download libn::any
include(external/eigen) # download eigen3
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
IF(NOT WITH_SWIG_PY)
return()
ENDIF()
FIND_PACKAGE(SWIG)
IF(NOT SWIG_FOUND)
# build swig as an external project
INCLUDE(ExternalProject)
SET(SWIG_SOURCES_DIR ${THIRD_PARTY_PATH}/swig)
SET(SWIG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/swig)
SET(SWIG_TARGET_VERSION "3.0.2")
SET(SWIG_DOWNLOAD_SRC_MD5 "62f9b0d010cef36a13a010dc530d0d41")
SET(SWIG_DOWNLOAD_WIN_MD5 "3f18de4fc09ab9abb0d3be37c11fbc8f")
IF(WIN32)
# swig.exe available as pre-built binary on Windows:
ExternalProject_Add(swig
URL http://prdownloads.sourceforge.net/swig/swigwin-${SWIG_TARGET_VERSION}.zip
URL_MD5 ${SWIG_DOWNLOAD_WIN_MD5}
SOURCE_DIR ${SWIG_SOURCES_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
UPDATE_COMMAND ""
)
SET(SWIG_DIR ${SWIG_SOURCES_DIR} CACHE FILEPATH "SWIG Directory" FORCE)
SET(SWIG_EXECUTABLE ${SWIG_SOURCES_DIR}/swig.exe CACHE FILEPATH "SWIG Executable" FORCE)
ELSE(WIN32)
# swig uses bison find it by cmake and pass it down
FIND_PACKAGE(BISON)
# From SWIG configure
ExternalProject_Add(swig
GIT_REPOSITORY https://github.com/swig/swig.git
GIT_TAG rel-3.0.10
PREFIX ${SWIG_SOURCES_DIR}
CONFIGURE_COMMAND cd <SOURCE_DIR> && ./autogen.sh && ./configure
--prefix=${SWIG_INSTALL_DIR} --without-pcre
BUILD_COMMAND cd <SOURCE_DIR> && make
INSTALL_COMMAND cd <SOURCE_DIR> && make install
UPDATE_COMMAND ""
)
SET(SWIG_DIR ${SWIG_INSTALL_DIR}/share/swig/${SWIG_TARGET_VERSION})
SET(SWIG_EXECUTABLE ${SWIG_INSTALL_DIR}/bin/swig)
ENDIF(WIN32)
LIST(APPEND external_project_dependencies swig)
ENDIF(NOT SWIG_FOUND)
swig_paddle.py
_swig_paddle.so
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from util import DataProviderWrapperConverter
from dataprovider_converter import DataProviderConverter
__all__ = [
'paddle',
'DataProviderConverter',
'DataProviderWrapperConverter', # for deprecated usage.
'loadParameterFile'
]
util.monkeypatches()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.trainer.PyDataProvider2 as dp2
import collections
import swig_paddle
import numpy
import itertools
from functools import reduce
__all__ = ['DataProviderConverter']
class IScanner(object):
"""
The scanner will scan Python object two passes, then convert it to Paddle's
argument.
In the first pass, `pre_scan` will be invoked by every data instance, and
then invoke `finish_pre_scan` to arguments. And the second pass do the same
thing except the functions changed to `scan`, `finish_scan`.
During the first pass, a scanner may count the shape of input matrix and
allocate memory for this argument. Then fill the data into this argument
in second pass.
"""
def __init__(self, input_type, pos):
self.input_type = input_type
if not isinstance(self.input_type, dp2.InputType):
raise ValueError("input type should be dataprovider2.InputType")
self.pos = pos
# data_in_gpu is used to indicate whether to create argument on GPU
# or not in GPU mode. Now if using one thread (trainer_count=1),
# trainer uses NeuralNetwork which needs to create argument on GPU
# before calling forward function. So, set data_in_gpu to True.
# Otherwise, trainer uses MultiGradientMachine which will transfer
# data from CPU to GPU in the forward function, set data_in_gpu to
# False in this case.
self.data_in_gpu = swig_paddle.isUsingGpu(
) and swig_paddle.getTrainerCount() == 1
def pre_scan(self, dat):
"""
First pass scan method. During this method, the scanner could count the
data number, and get the total memory size this batch would use.
:param dat: The python object.
"""
pass
def finish_pre_scan(self, argument):
"""
Finish first scan pass. Allocate the memory.
:param argument: Output arguments object.
:type argument: swig_paddle.Arguments
:param dat: Output arguments object.
:type dat: The Python object, numpy.array or List.
:return:
"""
pass
def scan(self, dat):
"""
Second pass scan method. Copy the data to arguments.
:param dat: The python object.
"""
pass
def finish_scan(self, argument):
"""
Finish second pass. Finalize the resources, etc.
:param argument: Output arguments object.
:type argument: swig_paddle.Arguments
"""
pass
class DenseScanner(IScanner):
"""
:type __mat__: numpy.ndarray
"""
def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos)
self.__mat__ = None
self.__shape__ = None
self.__height__ = 0
self.__dim__ = 0
def pre_scan(self, dat):
self.__height__ += 1
if self.__shape__ is None:
self.__shape__ = numpy.array(dat).shape
if len(self.__shape__) > 3:
raise ValueError(
"The dimension of input cannot be greater than 3.")
if len(self.__shape__) == 0:
raise ValueError(
"The input should be a vector, please check your input data."
)
self.__dim__ = reduce(lambda x, y: x * y, self.__shape__)
if len(self.__shape__) == 1 and self.__dim__ != self.input_type.dim:
raise ValueError(
"The data size must be equal to it in data layer.")
else:
if self.__shape__ != numpy.array(dat).shape:
raise ValueError(
"The data shape must be same in one mini-batch.")
def finish_pre_scan(self, argument):
self.__mat__ = numpy.ndarray(
shape=(self.__height__, self.__dim__), dtype=numpy.float32)
self.__height__ = 0
def scan(self, dat):
# It's better to use NumPy array for speed.
dat = numpy.array(dat)
dat = dat.flatten()
self.__mat__[self.__height__] = dat
self.__height__ += 1
def finish_scan(self, argument):
assert isinstance(argument, swig_paddle.Arguments)
if self.__mat__.dtype != numpy.float32:
self.__mat__ = self.__mat__.astype(numpy.float32)
m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True,
self.data_in_gpu)
argument.setSlotValue(self.pos, m)
if len(self.__shape__) > 1:
# The last-two dimenstions are the frame height and width.
# For example, the layout is CHW for 3-D feature of image.
# The H and W are the frame height and width.
h, w = self.__shape__[-2:]
argument.setSlotFrameHeight(self.pos, h)
argument.setSlotFrameWidth(self.pos, w)
self.__shape__ = None
class SparseBinaryScanner(IScanner):
def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos)
self.__rows__ = [0]
self.__cols__ = []
self.__height__ = 0
self.__value__ = []
def scan(self, dat):
self.extend_cols(dat)
self.__rows__.append(len(self.__cols__))
self.__height__ += 1
def extend_cols(self, dat):
self.__cols__.extend(dat)
def finish_scan(self, argument):
assert isinstance(argument, swig_paddle.Arguments)
m = swig_paddle.Matrix.createSparse(
self.__height__,
self.input_type.dim,
len(self.__cols__),
len(self.__value__) == 0,
False, # trans
False) # TODO supoort GPU
assert isinstance(m, swig_paddle.Matrix)
m.sparseCopyFrom(self.__rows__, self.__cols__, self.__value__)
argument.setSlotValue(self.pos, m)
class SparseFloatScanner(SparseBinaryScanner):
def __init__(self, input_type, pos):
SparseBinaryScanner.__init__(self, input_type, pos)
def extend_cols(self, dat):
self.__cols__.extend((x[0] for x in dat))
self.__value__.extend((x[1] for x in dat))
class IndexScanner(IScanner):
def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos)
self.__ids__ = None
self.__idx__ = 0
def pre_scan(self, dat):
self.__idx__ += 1
def finish_pre_scan(self, argument):
self.__ids__ = [0] * self.__idx__
self.__idx__ = 0
def scan(self, dat):
self.__ids__[self.__idx__] = dat
self.__idx__ += 1
def finish_scan(self, argument):
ids = swig_paddle.IVector.create(self.__ids__, self.data_in_gpu)
assert isinstance(argument, swig_paddle.Arguments)
argument.setSlotIds(self.pos, ids)
class SequenceScanner(IScanner):
def __init__(self, input_type, pos, inner_scanner, setter):
IScanner.__init__(self, input_type, pos)
self.__seq__ = [0]
self.__inner_scanner__ = inner_scanner
self.__setter__ = setter
def pre_scan(self, dat):
for each in dat:
self.__inner_scanner__.pre_scan(each)
def finish_pre_scan(self, argument):
self.__inner_scanner__.finish_pre_scan(argument)
def scan(self, dat):
self.__seq__.append(self.__seq__[-1] + self.get_size(dat))
for each in dat:
self.__inner_scanner__.scan(each)
def finish_scan(self, argument):
seq = swig_paddle.IVector.create(self.__seq__, False)
self.__setter__(argument, self.pos, seq)
self.__inner_scanner__.finish_scan(argument)
def get_size(self, dat):
if isinstance(self.__inner_scanner__, SequenceScanner):
return sum(self.__inner_scanner__.get_size(item) for item in dat)
else:
return len(dat)
class DataProviderConverter(object):
def __init__(self, input_types):
self.input_types = input_types
assert isinstance(self.input_types, collections.Sequence)
for each in self.input_types:
assert isinstance(each, dp2.InputType)
def convert(self, dat, argument=None):
if argument is None:
argument = swig_paddle.Arguments.createArguments(0)
assert isinstance(argument, swig_paddle.Arguments)
argument.resize(len(self.input_types))
scanners = [
DataProviderConverter.create_scanner(i, each_type)
for i, each_type in enumerate(self.input_types)
]
for each_sample in dat:
for each_step, scanner in itertools.izip(each_sample, scanners):
scanner.pre_scan(each_step)
for scanner in scanners:
scanner.finish_pre_scan(argument)
for each_sample in dat:
for each_step, scanner in itertools.izip(each_sample, scanners):
scanner.scan(each_step)
for scanner in scanners:
scanner.finish_scan(argument)
return argument
def __call__(self, dat, argument=None):
return self.convert(dat, argument)
@staticmethod
def create_scanner(i, each):
assert isinstance(each, dp2.InputType)
retv = None
if each.type == dp2.DataType.Dense:
retv = DenseScanner(each, i)
elif each.type == dp2.DataType.Index:
retv = IndexScanner(each, i)
elif each.type == dp2.DataType.SparseNonValue:
retv = SparseBinaryScanner(each, i)
elif each.type == dp2.DataType.SparseValue:
retv = SparseFloatScanner(each, i)
assert retv is not None
if each.seq_type == dp2.SequenceType.SUB_SEQUENCE:
retv = SequenceScanner(
each, i, retv,
lambda a, p, seq: a.setSlotSubSequenceStartPositions(p, seq))
if each.seq_type in [
dp2.SequenceType.SUB_SEQUENCE, dp2.SequenceType.SEQUENCE
]:
retv = SequenceScanner(
each, i, retv,
lambda a, p, seq: a.setSlotSequenceStartPositions(p, seq))
return retv
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Some Useful method for py_paddle.
"""
import swig_paddle
import os
import paddle.trainer.PyDataProviderWrapper
import paddle.proto.ParameterConfig_pb2
import paddle.proto.ModelConfig_pb2
import paddle.proto.TrainerConfig_pb2
import weakref
import numpy
import struct
import sys
import copy
def initializePaddle(*args):
"""
To initialize paddle process.
:param args: Command line options, such as --use_gpu=0, etc.
:return: Nothing.
"""
old_argv = copy.deepcopy(sys.argv)
old_pypath = os.getenv("PYTHONPATH")
pypath = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if old_pypath is not None:
pypath = os.pathsep.join([pypath, old_pypath])
os.putenv("PYTHONPATH", pypath)
args = [""] + list(args) # argv[0] is command name, it is not important.
swig_paddle.__initPaddle__(args)
sys.argv = old_argv
def __monkeypatch_init_paddle__():
swig_paddle.__initPaddle__ = swig_paddle.initPaddle
swig_paddle.initPaddle = initializePaddle
class __ParameterCallbackWrapper__(swig_paddle.UpdateCallback):
"""
Wrap the python callable object to paddle.UpdateCallback.
INTERNAL USE ONLY.
"""
def __init__(self, callback):
swig_paddle.UpdateCallback.__init__(self)
self.callback = callback
def apply(self, param):
self.callback(param)
@staticmethod
def wrap(callback):
"""
Cast the python callable object/paddle.UpdateCallback to
swig_paddle.UpdateCallback.__disown__
:param callback: callable or swig_paddle.UpdateCallback object.
"""
if isinstance(callback, swig_paddle.UpdateCallback):
return callback.__disown__()
elif isinstance(callback, weakref.ProxyType):
raise RuntimeError("Should not pass __disown__ object")
else:
return __ParameterCallbackWrapper__(callback).__disown__()
def __arguments_to_numpy__(i, arg):
assert isinstance(arg, swig_paddle.Arguments)
value = arg.getSlotValue(i)
ids = arg.getSlotIds(i)
prob = arg.getSlotIn(i)
if value is not None:
assert isinstance(value, swig_paddle.Matrix)
value = value.copyToNumpyMat()
if ids is not None:
assert isinstance(ids, swig_paddle.IVector)
ids = ids.copyToNumpyArray()
if prob is not None:
assert isinstance(prob, swig_paddle.Matrix)
prob = prob.copyToNumpyMat()
return {"value": value, "id": ids, "prob": prob}
def __monkeypatch_gradient_machine__():
"""
Add some class methods to GradientMachine.
This method should be only used internally.
"""
swig_paddle.GradientMachine.loadFromConfigFile = \
staticmethod(loadGradientMachine)
def __matrix_to_numpy__(m):
if isinstance(m, swig_paddle.Matrix):
return m.copyToNumpyMat()
elif isinstance(m, swig_paddle.IVector):
return m.copyToNumpyArra()
else:
raise RuntimeError("Input arg should be matrix or vecotr.")
def createFromConfigProto(protoObj,
createMode=swig_paddle.CREATE_MODE_NORMAL,
paramTypes=[
swig_paddle.PARAMETER_VALUE,
swig_paddle.PARAMETER_GRADIENT,
swig_paddle.PARAMETER_MOMENTUM
]):
"""
Create Gradient Machine From Proto object.
:param protoObj: Model config
:type protoObj: proto.ModelConfig_pb2.ModelConfig
:param createMode: Create Mode, default is normal.
:type createMode: int
:param paramTypes: the gradient machine parameter type.
:type paramTypes: list of int
:return: paddle.GradientMachine
"""
assert isinstance(protoObj, paddle.proto.ModelConfig)
return swig_paddle.GradientMachine.createByConfigProtoStr(
protoObj.SerializeToString(), createMode, paramTypes)
swig_paddle.GradientMachine.createFromConfigProto = \
staticmethod(createFromConfigProto)
def forwardTest(self, inArgs):
"""
forwardTest. forward gradient machine in test mode, and return a numpy
matrix dict.
:param inArgs: The input arguments
:type inArgs: paddle.Arguments
:return: A dictionary with keys ['id', 'value'], each value is a
numpy.ndarray.
"""
outArgs = swig_paddle.Arguments.createArguments(0)
self.forward(inArgs, outArgs, swig_paddle.PASS_TEST)
return [
__arguments_to_numpy__(i, outArgs)
for i in xrange(outArgs.getSlotNum())
]
swig_paddle.GradientMachine.forwardTest = forwardTest
# Monkey patching backward
swig_paddle.GradientMachine.__backward__ = swig_paddle.GradientMachine.backward
def backward(self, callback):
"""
GradientMachine Backward
:param callback: a callback which parameter is (paddle.Parameter) or
a paddle.UpdateCallback object.
"""
self.__backward__(__ParameterCallbackWrapper__.wrap(callback))
swig_paddle.GradientMachine.backward = backward
# Monkey patching forwardBackward.
swig_paddle.GradientMachine.__forwardBackward__ = \
swig_paddle.GradientMachine.forwardBackward
def forwardBackward(self,
inArgs,
outArgs,
passType,
callback=swig_paddle.UpdateCallback()):
"""
GradientMachine forward backward.
:param inArgs: Input Arguments for GradientMachine.
:type inArgs: paddle.Arguments
:param outArgs: Output Arguments for GradientMachine.
:type outArgs: paddle.Arguments
:param passType: gradient machine's pass type.
:type passType: paddle.PassType
:param callback: a callable object with arguments (paddle.Parameter) or
a paddle.UpdateCallback it will be called when
backward
"""
self.__forwardBackward__(inArgs, outArgs, passType,
__ParameterCallbackWrapper__.wrap(callback))
swig_paddle.GradientMachine.forwardBackward = forwardBackward
def getParameters(self):
return (self.getParameter(i) for i in xrange(self.getParameterSize()))
swig_paddle.GradientMachine.getParameters = getParameters
def getNonStaticParameters(self):
return (self.getNonStaticParameter(i)
for i in xrange(self.getNonStaticParameterSize()))
swig_paddle.GradientMachine.getNonStaticParameters = getNonStaticParameters
def getLayerOutputs(self, layerNames):
"""
getLayerOutputs. get outputs of layers and return a numpy matrix dict.
:param layerNames: layer names.
:type layerNames: string or list.
"""
if isinstance(layerNames, basestring):
layerNames = [layerNames]
elif not isinstance(layerNames, list):
raise RuntimeError("Input args shuld be string or a sting list.")
output = dict()
for name in layerNames:
output[name] = __arguments_to_numpy__(0, self.getLayerOutput(name))
return output
swig_paddle.GradientMachine.getLayerOutputs = getLayerOutputs
def loadGradientMachine(config_filename, model_dir=None):
"""
Load a gradient machine from config file name/path.
:param config_filename: The trainer config file name/path
:param model_dir: The model parameter directory. None if same as the
directory of config_filename
:return: GradientMachine with some enhance methods.
:rtype: paddle.GradientMachine
"""
trainer_config = swig_paddle.TrainerConfig.createFromTrainerConfigFile(
config_filename)
assert isinstance(trainer_config, swig_paddle.TrainerConfig)
model_conf = trainer_config.getModelConfig()
network = swig_paddle.GradientMachine.createByModelConfig(model_conf)
assert isinstance(network, swig_paddle.GradientMachine)
if model_dir is None:
model_dir = os.path.dirname(config_filename)
network.loadParameters(model_dir)
return network
def loadParameterFile(fn):
"""
Load Paddle Parameter file to numpy.ndarray
:param fn: file name or file like object.
:type fn: str or file like object.
:return: numpy array
:rtype: numpy.ndarray
:raise: paddle.UnsupportError when parameter format is wrong.
"""
if isinstance(fn, str):
with open(fn, 'rb') as f:
return loadParameterFile(f)
elif hasattr(fn, 'read'): # File like object
version, = struct.unpack('i', fn.read(4))
if version != 0:
raise swig_paddle.UnsupportError()
value_length, = struct.unpack("I", fn.read(4))
if value_length != 4 and value_length != 8:
raise swig_paddle.UnsupportError()
dtype = 'float32' if value_length == 4 else 'float64'
param_size, = struct.unpack("L", fn.read(8))
value = numpy.fromfile(fn, dtype)
if len(value) != param_size:
raise swig_paddle.UnsupportError()
return value
else:
raise swig_paddle.UnsupportError()
class DataProviderWrapperConverter(object):
"""
A class convert DataFormat from PyDataProvider Wrapper to
py_paddle.paddle.Arguemnts.
"""
class DenseValueConverter(object):
"""
Internal class
"""
def __init__(self, header_def):
self.__dim__ = header_def.dim
self.buf = []
def append(self, other):
assert len(other) == self.__dim__
self.buf += other
def __call__(self, slot_idx, arg):
mat = swig_paddle.Matrix.createDense(self.buf,
len(self.buf) / self.__dim__,
self.__dim__)
arg.setSlotValue(slot_idx, mat)
class IdValueConverter(object):
"""
Internal class
"""
def __init__(self, *args):
self.buf = []
def append(self, other):
assert isinstance(other, int)
self.buf.append(other)
def __call__(self, slot_idx, arg):
arg.setSlotIds(slot_idx, swig_paddle.IVector.create(self.buf))
class SparseNonValueConverter(object):
"""
Internal class
"""
def __init__(self, slot_def):
self.indices = [0]
self.cols = []
self.dim = slot_def.dim
def append(self, other):
self.indices.append(self.indices[-1] + len(other))
self.cols += other
def __call__(self, slot_idx, arg):
mat = swig_paddle.Matrix.createSparse(
len(self.indices) - 1, self.dim, len(self.cols), True)
assert isinstance(mat, swig_paddle.Matrix)
mat.sparseCopyFrom(self.indices, self.cols)
self.putIntoArg(slot_idx, arg, mat)
def putIntoArg(self, slot_idx, arg, mat):
arg.setSlotValue(slot_idx, mat)
class SparseValueConverter(SparseNonValueConverter):
"""
Internal class
"""
def __init__(self, slot_def):
super(DataProviderWrapperConverter.SparseValueConverter,
self).__init__(slot_def)
self.values = []
def append(self, other):
super(DataProviderWrapperConverter.SparseValueConverter,
self).append(map(lambda x: x[0], other))
self.values += map(lambda x: x[1], other)
def __call__(self, slot_idx, arg):
mat = swig_paddle.Matrix.createSparse(
len(self.indices) - 1, self.dim, len(self.cols), False)
assert isinstance(mat, swig_paddle.Matrix)
mat.sparseCopyFrom(self.indices, self.cols, self.values)
self.putIntoArg(slot_idx, arg, mat)
__SLOT_VALUE_CONVERTER_MAP__ = {
paddle.trainer.PyDataProviderWrapper.DenseSlot: DenseValueConverter,
paddle.trainer.PyDataProviderWrapper.IndexSlot: IdValueConverter,
paddle.trainer.PyDataProviderWrapper.SparseNonValueSlot:
SparseNonValueConverter,
paddle.trainer.PyDataProviderWrapper.SparseValueSlot:
SparseValueConverter
}
def __init__(self, use_seq, header):
"""
Ctor
:param use_seq: True if use sequence.
:param header: List of slots type,
trainer.PyDataProviderWrapper.SlotType
"""
self.__use_seq__ = use_seq
self.__header__ = header
def convert(self, wrapper_data, argument=None):
"""
Convert PyDataProviderWrapper format to paddle.Argument
:param wrapper_data: PyDataProviderWrapper yield's data list.
:param argument: The output paddle.Arguments.
If it is not None, it will assign data in this
arguments, else it will create new arguments.
:return: arguments that contains data.
:rtype: paddle.Arguments
"""
if argument is None:
argument = swig_paddle.Arguments.createArguments(0)
assert isinstance(argument, swig_paddle.Arguments)
argument.resize(len(self.__header__))
values = map(
lambda x: DataProviderWrapperConverter.__SLOT_VALUE_CONVERTER_MAP__[x.__class__](x),
self.__header__)
if self.__use_seq__:
seq_dim = [[] for _ in xrange(self.__header__.__len__())]
seq_start_pos = [[0] for _ in xrange(self.__header__.__len__())]
for each_sample in wrapper_data:
for slot_idx, sequence in enumerate(each_sample):
for raw_data in sequence:
values[slot_idx].append(raw_data)
seq_start_pos[slot_idx].append(seq_start_pos[slot_idx][-1] +
len(sequence))
seq_dim[slot_idx].append(len(sequence))
for slot_idx in xrange(len(self.__header__)):
argument.setSlotSequenceDim(
slot_idx, swig_paddle.IVector.create(seq_dim[slot_idx]))
argument.setSlotSequenceStartPositions(
slot_idx,
swig_paddle.IVector.create(seq_start_pos[slot_idx]))
else:
for each_sample in wrapper_data:
for raw_data, value in zip(each_sample, values):
value.append(raw_data)
for i, v in enumerate(values):
v(i, argument)
return argument
def __call__(self, wrapper_data, argument=None):
"""
Invoke self.convert. See documents in self.convert.
"""
return self.convert(wrapper_data, argument)
def __monkey_patch_protobuf_objects__():
def ParameterConfig_toProto(self):
"""
Convert paddle.ParameterConfig to
proto.ParameterConfig_pb2.ParameterConfig
:return: proto.ParameterConfig_pb2.ParameterConfig object.
"""
param_conf = paddle.proto.ParameterConfig_pb2.ParameterConfig()
param_conf.ParseFromString(self.toProtoString())
return param_conf
swig_paddle.ParameterConfig.toProto = ParameterConfig_toProto
def OptimizationConfig_toProto(self):
"""
Convert paddle.OptimizationConfig to
proto.TrainerConfig_pb2.OptimizationConfig
:return: proto.TrainerConfig_pb2.OptimizationConfig
"""
opt_conf = proto.TrainerConfig_pb2.OptimizationConfig()
opt_conf.ParseFromString(self.toProtoString())
return opt_conf
swig_paddle.OptimizationConfig.toProto = OptimizationConfig_toProto
def OptimizationConfig_createFromProto(protoObj):
"""
Create a new paddle.OptimizationConfig from
proto.TrainerConfig_pb2.OptimizationConfig
:param protoObj: proto.TrainerConfig_pb2.OptimizationConfig
:return: paddle.OptimizationConfig
"""
assert isinstance(protoObj, paddle.proto.OptimizationConfig)
return swig_paddle.OptimizationConfig.createFromProtoString(
protoObj.SerializeToString())
swig_paddle.OptimizationConfig.createFromProto = staticmethod(
OptimizationConfig_createFromProto)
def TrainerConfig_createFromProto(protoObj):
"""
Create a new paddle.TrainerConfig from
proto.OptimizationConfig
:param protoObj: proto.TrainerConfig
:return: paddle.TrainerConfig
"""
assert isinstance(protoObj, paddle.proto.TrainerConfig)
return swig_paddle.TrainerConfig.createFromProtoString(
protoObj.SerializeToString())
swig_paddle.TrainerConfig.createFromProto = staticmethod(
TrainerConfig_createFromProto)
def __monkey_patch_parameter__():
def getBufs(self):
"""
get all parameter vectors.
NOTE: the return value is a generator. Maybe you need to cast to
list or tuple or something else.
:return: generator of all parameter vectors.
:rtype: generator
"""
return (self.getBuf(i) for i in xrange(swig_paddle.NUM_PARAMETER_TYPES))
swig_paddle.Parameter.getBufs = getBufs
def __monkey_patch_trainer__():
swig_paddle.Trainer.__create__ = staticmethod(swig_paddle.Trainer.create)
def Trainer_create(config, model=None):
"""
Create a trainer for model with TrainerCOnfig trainer_config
trainer_config.model_config will be ignored when model is supplied.
Trainer.trainOneBatch() and Trainer.forwardOneBatch() can be used only
when trainer_config.data_config is set.
A typical usage for Trainer is:
.. code-block:: python
trainer = Trainer.create(trainer_config, model)
for p in xrange(num_passes)
while True:
data = get_next_batch(batch_size)
if not data:
break
trainer.trainOneDataBatch(batch_size, data)
trainer.finishTrainPass()
trainer.finishTrain()
The trainer will take care of logging, model saving, distributed
training, etc.
:param config: trainer configuration
:type config: paddle.proto.TrainerConfig
:param model: the model to be trained
:type model: swig_paddle.GradientMachine
:return: a trainer
:rtype swig_paddle.Trainer
"""
assert isinstance(config, paddle.proto.TrainerConfig)
if model is not None:
assert isinstance(model, swig_paddle.GradientMachine)
return swig_paddle.Trainer.__create__(
swig_paddle.TrainerConfig.createFromProto(config), model)
swig_paddle.Trainer.create = staticmethod(Trainer_create)
swig_paddle.Trainer.__getForwardOutput__ = \
swig_paddle.Trainer.getForwardOutput
def getForwardOutput(self):
"""
Get the netword outputs from the previous trainOneBatch(),
trainOneDataBatch(), testOneDataPatch(), or forwardOneBatch() call.
:return: list of dictionary with keys ['id', 'value'], each value is a
numpy.ndarray.
"""
outArgs = self.__getForwardOutput__()
return [
__arguments_to_numpy__(i, outArgs)
for i in xrange(outArgs.getSlotNum())
]
swig_paddle.Trainer.getForwardOutput = getForwardOutput
def monkeypatches():
patches = [
__monkeypatch_init_paddle__, __monkeypatch_gradient_machine__,
__monkey_patch_protobuf_objects__, __monkey_patch_parameter__,
__monkey_patch_trainer__
]
for patch in patches:
patch()
......@@ -68,7 +68,6 @@ Users can specify the following Docker build arguments with either "ON" or "OFF"
| `WITH_TESTING` | OFF | Build unit tests binaries. |
| `WITH_MKL` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) and [Intel® MKL-DNN](https://github.com/01org/mkl-dnn) support. |
| `WITH_GOLANG` | OFF | Build fault-tolerant parameter server written in go. |
| `WITH_SWIG_PY` | ON | Build with SWIG python API support. |
| `WITH_PYTHON` | ON | Build with python support. Turn this off if build is only for capi. |
| `WITH_STYLE_CHECK` | ON | Check the code style when building. |
| `PYTHON_ABI` | "" | Build for different python ABI support, can be cp27-cp27m or cp27-cp27mu |
......
......@@ -180,7 +180,6 @@ function cmake_gen() {
-DWITH_GOLANG=${WITH_GOLANG:-OFF}
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All}
-DWITH_PYTHON=${WITH_PYTHON:-ON}
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON}
-DCUDNN_ROOT=/usr/
-DWITH_TESTING=${WITH_TESTING:-ON}
-DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake
......@@ -214,7 +213,6 @@ EOF
-DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-OFF} \
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \
-DWITH_PYTHON=${WITH_PYTHON:-ON} \
-DCUDNN_ROOT=/usr/ \
-DWITH_TESTING=${WITH_TESTING:-ON} \
......
......@@ -39,7 +39,6 @@ add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE})
IF(WIN32)
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python/paddle/
COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/
COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python
......@@ -48,7 +47,6 @@ ELSE(WIN32)
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND touch stub.cc
COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python
COMMAND cp -r ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册