inference.py 6.5 KB
Newer Older
D
dzhwinter 已提交
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Y
Yu Yang 已提交
15 16
import numpy
import collections
Y
Yu Yang 已提交
17
import topology
Y
Yu Yang 已提交
18
import minibatch
19
import cPickle
Y
Yu Yang 已提交
20

Y
Yu Yang 已提交
21
__all__ = ['infer', 'Inference']
Y
Yu Yang 已提交
22 23


Y
Yu Yang 已提交
24
class Inference(object):
Q
qijun 已提交
25 26 27
    """
    Inference combines neural network output and parameters together
    to do inference.
X
xuwei06 已提交
28

Y
Yu Yang 已提交
29
    ..  code-block:: python
X
xuwei06 已提交
30

Y
Yu Yang 已提交
31 32 33 34
        inferer = Inference(output_layer=prediction, parameters=parameters)
        for data_batch in batches:
            print inferer.infer(data_batch)

Q
qijun 已提交
35

Y
Yu Yang 已提交
36
    :param output_layer: The neural network that should be inferenced.
Q
qijun 已提交
37 38 39 40 41
    :type output_layer: paddle.v2.config_base.Layer or the sequence
                        of paddle.v2.config_base.Layer
    :param parameters: The parameters dictionary.
    :type parameters: paddle.v2.parameters.Parameters
    """
Q
qijun 已提交
42

43
    def __init__(self, parameters, output_layer=None, fileobj=None):
Y
Yu Yang 已提交
44
        import py_paddle.swig_paddle as api
45 46

        if output_layer is not None:
47 48 49 50
            topo = topology.Topology(output_layer)
            gm = api.GradientMachine.createFromConfigProto(
                topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE])
            self.__data_types__ = topo.data_type()
51 52 53 54 55 56 57 58 59
        elif fileobj is not None:
            tmp = cPickle.load(fileobj)
            gm = api.GradientMachine.createByConfigProtoStr(
                tmp['protobin'], api.CREATE_MODE_TESTING,
                [api.PARAMETER_VALUE])
            self.__data_types__ = tmp['data_type']
        else:
            raise ValueError("Either output_layer or fileobj must be set")

Y
Yu Yang 已提交
60 61 62
        for param in gm.getParameters():
            val = param.getBuf(api.PARAMETER_VALUE)
            name = param.getName()
Y
Yu Yang 已提交
63 64
            assert isinstance(val, api.Vector)
            val.copyFromNumpyArray(parameters.get(name).flatten())
D
dangqingqing 已提交
65 66 67 68 69 70 71
            # the setValueUpdated function is called in randomize, zeroMem,
            # load function in paddle/parameter/Parameter.cpp. But in the
            # inference mode, the setValueUpdated is never called, it will
            # cause the parameter will not be dispatched
            # in MultiGradientMachine for multi-GPU. So setValueUpdated is
            # called here, but it's better to call this function in one place.
            param.setValueUpdated()
Y
Yu Yang 已提交
72 73
        self.__gradient_machine__ = gm

Y
Yu Yang 已提交
74
    def iter_infer(self, input, feeding=None):
Y
Yu Yang 已提交
75
        from data_feeder import DataFeeder
Y
Yu Yang 已提交
76
        feeder = DataFeeder(self.__data_types__, feeding)
Y
Yu Yang 已提交
77
        batch_size = len(input)
Y
Yu Yang 已提交
78

Y
Yu Yang 已提交
79 80 81
        def __reader_impl__():
            for each_sample in input:
                yield each_sample
Y
Yu Yang 已提交
82

Y
Yu Yang 已提交
83
        reader = minibatch.batch(__reader_impl__, batch_size=batch_size)
Y
Yu Yang 已提交
84

Y
Yu Yang 已提交
85 86
        self.__gradient_machine__.start()
        for data_batch in reader():
Y
Yu Yang 已提交
87
            yield self.__gradient_machine__.forwardTest(feeder(data_batch))
Y
Yu Yang 已提交
88 89 90
        self.__gradient_machine__.finish()

    def iter_infer_field(self, field, **kwargs):
T
Tao Luo 已提交
91 92 93
        if not isinstance(field, list) and not isinstance(field, tuple):
            field = [field]

T
Tao Luo 已提交
94 95 96 97 98
        for result in self.iter_infer(**kwargs):
            for each_result in result:
                item = [each_result[each_field] for each_field in field]
                yield item

C
caoying03 已提交
99
    def infer(self, input, field='value', flatten_result=True, **kwargs):
Y
Yu Yang 已提交
100 101 102 103 104
        """
        Infer a data by model.
        :param input: input data batch. Should be python iterable object.
        :param field: output field.
        """
T
Tao Luo 已提交
105
        retv = None
Y
Yu Yang 已提交
106
        kwargs['input'] = input
T
Tao Luo 已提交
107 108
        for result in self.iter_infer_field(field=field, **kwargs):
            if retv is None:
L
Luo Tao 已提交
109
                retv = [[] for i in xrange(len(result))]
T
Tao Luo 已提交
110 111
            for i, item in enumerate(result):
                retv[i].append(item)
C
caoying03 已提交
112

113 114 115
        if retv == None:
            return []

C
caoying03 已提交
116 117 118
        if flatten_result:
            retv = [numpy.concatenate(out) for out in retv]

T
Tao Luo 已提交
119 120 121 122
        if len(retv) == 1:
            return retv[0]
        else:
            return retv
Y
Yu Yang 已提交
123 124


125
def infer(output_layer, parameters, input, feeding=None, field='value'):
Y
Yu Yang 已提交
126 127 128 129
    """
    Infer a neural network by given neural network output and parameters.  The
    user should pass either a batch of input data or reader method.

L
Luo Tao 已提交
130
    Example usage for sinlge output_layer:
Y
Yu Yang 已提交
131 132 133

    ..  code-block:: python

X
xuwei06 已提交
134 135
        result = paddle.infer(output_layer=prediction,
                              parameters=parameters,
136
                              input=SomeData)
Y
Yu Yang 已提交
137 138
        print result

L
Luo Tao 已提交
139
    Example usage for multiple outout_layers and fields:
140 141 142

    ..  code-block:: python

X
xuwei06 已提交
143 144
        result = paddle.infer(output_layer=[prediction1, prediction2],
                              parameters=parameters,
145 146 147 148
                              input=SomeData,
                              field=[id, value]])
        print result

149
    :param output_layer: output of the neural network that would be inferred
X
xuwei06 已提交
150
    :type output_layer: paddle.v2.config_base.Layer or a list of
151
                        paddle.v2.config_base.Layer
Y
Yu Yang 已提交
152 153 154 155 156
    :param parameters: parameters of the neural network.
    :type parameters: paddle.v2.parameters.Parameters
    :param input: input data batch. Should be a python iterable object, and each
                  element is the data batch.
    :type input: collections.Iterable
157
    :param feeding: Reader dictionary. Default could generate from input
Y
Yu Yang 已提交
158
                        value.
X
xuwei06 已提交
159 160
    :param field: The prediction field. It should in [`value`, `id`, `prob`].
                  `value` and `prob` mean return the prediction probabilities,
L
Luo Tao 已提交
161
                  `id` means return the prediction labels. Default is `value`.
X
xuwei06 已提交
162
                  Note that `prob` only used when output_layer is beam_search
L
Luo Tao 已提交
163
                  or max_id.
Y
Yu Yang 已提交
164
    :type field: str
X
xuwei06 已提交
165 166
    :return: The prediction result. If there are multiple outout_layers and fields,
             the return order is outout_layer1.field1, outout_layer2.field1, ...,
L
Luo Tao 已提交
167
             outout_layer1.field2, outout_layer2.field2 ...
Y
Yu Yang 已提交
168 169 170
    :rtype: numpy.ndarray
    """

171 172
    inferer = Inference(output_layer=output_layer, parameters=parameters)
    return inferer.infer(field=field, input=input, feeding=feeding)