transformer.py 13.3 KB
Newer Older
1 2 3 4 5
import numpy as np

from ..errors import KaffeError, print_stderr
from ..graph import GraphBuilder, NodeMapper
from ..layers import NodeKind
6 7
from ..transformers import (DataInjector, DataReshaper, NodeRenamer,
                            SubNodeFuser, ReLUFuser, BatchNormScaleBiasFuser,
8
                            BatchNormPreprocessor, ParameterNamer, CropFuser)
9 10 11
from . import network


12 13
class PaddleNode(object):
    '''An intermediate representation for Paddle operations.'''
14 15

    def __init__(self, op, *args, **kwargs):
16
        # A string corresponding to the Paddle operation
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
        self.op = op
        # Positional arguments for the operation
        self.args = args
        # Keyword arguments for the operation
        self.kwargs = list(kwargs.items())
        # The source Caffe node
        self.node = None

    def format(self, arg):
        '''Returns a string representation for the given value.'''
        return "'%s'" % arg if isinstance(arg, basestring) else str(arg)

    def pair(self, key, value):
        '''Returns key=formatted(value).'''
        return '%s=%s' % (key, self.format(value))

    def emit(self):
        '''Emits the Python source for this node.'''
        # Format positional arguments
        args = map(self.format, self.args)
        # Format any keyword arguments
        if self.kwargs:
            args += [self.pair(k, v) for k, v in self.kwargs]
        # Set the node name
        args.append(self.pair('name', self.node.name))
        args = ', '.join(args)
        return '%s(%s)' % (self.op, args)


class MaybeActivated(object):
    def __init__(self, node, default=True):
        self.inject_kwargs = {}
        if node.metadata.get('relu', False) != default:
            self.inject_kwargs['relu'] = not default

52 53 54 55 56
        default_slope = 0.0
        slope = node.metadata.get('relu_negative_slope', default_slope)
        if slope != default_slope:
            self.inject_kwargs['relu_negative_slope'] = slope

57 58
    def __call__(self, *args, **kwargs):
        kwargs.update(self.inject_kwargs)
59
        return PaddleNode(*args, **kwargs)
60 61


62
class PaddleMapper(NodeMapper):
63 64 65
    def get_kernel_params(self, node):
        kernel_params = node.layer.kernel_parameters
        input_shape = node.get_only_parent().output_shape
66 67 68 69 70
        padding = [kernel_params.pad_h, kernel_params.pad_w]
        if padding[0] == 0 and padding[1] == 0:
            padding = {}
        else:
            padding = {'padding': padding}
71 72 73 74 75 76 77 78 79 80 81 82 83
        return (kernel_params, padding)

    def map_convolution(self, node):
        (kernel_params, kwargs) = self.get_kernel_params(node)
        h = kernel_params.kernel_h
        w = kernel_params.kernel_w
        c_o = node.output_shape[1]
        c_i = node.parents[0].output_shape[1]
        group = node.parameters.group
        if group != 1:
            kwargs['group'] = group
        if not node.parameters.bias_term:
            kwargs['biased'] = False
84 85 86 87

        if kernel_params.dila_h != 1 or kernel_params.dila_w != 1:
            kwargs['dilation'] = (kernel_params.dila_h, kernel_params.dila_w)

88 89 90 91 92 93
        assert kernel_params.kernel_h == h
        assert kernel_params.kernel_w == w
        return MaybeActivated(node)(
            'conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o,
            kernel_params.stride_h, kernel_params.stride_w, **kwargs)

94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
    def map_deconvolution(self, node):
        (kernel_params, kwargs) = self.get_kernel_params(node)
        h = kernel_params.kernel_h
        w = kernel_params.kernel_w
        c_o = node.output_shape[1]
        c_i = node.parents[0].output_shape[1]
        if not node.parameters.bias_term:
            kwargs['biased'] = False

        if kernel_params.dila_h != 1 or kernel_params.dila_w != 1:
            kwargs['dilation'] = (kernel_params.dila_h, kernel_params.dila_w)

        assert kernel_params.kernel_h == h
        assert kernel_params.kernel_w == w
        return MaybeActivated(node)(
            'deconv', kernel_params.kernel_h, kernel_params.kernel_w, c_o,
            kernel_params.stride_h, kernel_params.stride_w, **kwargs)

112
    def map_relu(self, node):
113
        return PaddleNode('relu')
114

115 116 117 118 119 120 121
    def map_prelu(self, node):
        channel_shared = getattr(node.parameters, 'channel_shared', False)
        return PaddleNode('prelu', channel_shared)

    def map_tanh(self, node):
        return PaddleNode('tanh')

122 123 124 125 126 127 128 129 130
    def map_pooling(self, node):
        pool_type = node.parameters.pool
        if pool_type == 0:
            pool_op = 'max_pool'
        elif pool_type == 1:
            pool_op = 'avg_pool'
        else:
            # Stochastic pooling, for instance.
            raise KaffeError('Unsupported pooling type.')
131

132
        ceil_mode = getattr(node.layer.parameters, 'ceil_mode', True)
133 134 135
        global_pool = getattr(node.layer.parameters, 'global_pooling', False)
        if global_pool:
            input_shape = node.get_only_parent().output_shape
136 137
            return PaddleNode(pool_op, input_shape.height, input_shape.width, 1,
                              1, ceil_mode)
138 139
        else:
            (kernel_params, padding) = self.get_kernel_params(node)
140 141 142
            return PaddleNode(pool_op, kernel_params.kernel_h,
                              kernel_params.kernel_w, kernel_params.stride_h,
                              kernel_params.stride_w, ceil_mode, **padding)
143 144

    def map_sigmoid(self, node):
145
        return PaddleNode('sigmoid')
146 147 148

    def map_custom(self, node):
        from .. import custom_layers
149
        return custom_layers.make_node(PaddleNode, node.kind, node)
150 151 152 153 154 155 156 157 158

    def map_inner_product(self, node):
        #TODO: Axis
        assert node.parameters.axis == 1
        #TODO: Unbiased
        assert node.parameters.bias_term == True
        return MaybeActivated(node)('fc', node.parameters.num_output)

    def map_softmax(self, node):
159
        return PaddleNode('softmax')
160 161 162 163

    def map_lrn(self, node):
        params = node.parameters
        # The window size must be an odd value. For a window
164
        # size of (2*n+1), Paddle defines depth_radius = n.
165
        assert params.local_size % 2 == 1
166
        # Caffe scales by (alpha/(2*n+1)), whereas Paddle
167 168 169
        # just scales by alpha (as does Krizhevsky's paper).
        # We'll account for that here.
        alpha = params.alpha / float(params.local_size)
170
        return PaddleNode('lrn', params.local_size, alpha, params.beta)
171 172

    def map_concat(self, node):
173
        return PaddleNode('concat', node.parameters.axis)
174 175

    def map_dropout(self, node):
176
        return PaddleNode('dropout', node.parameters.dropout_ratio)
177 178 179

    def map_batch_norm(self, node):
        scale_offset = len(node.data) == 4
180 181 182 183 184 185 186

        #this default value comes from caffe's param in batch_norm
        default_eps = 1e-5
        kwargs = {'scale_offset': scale_offset}
        if node.parameters.eps != default_eps:
            kwargs['eps'] = node.parameters.eps

187 188 189 190 191 192 193
        return MaybeActivated(
            node, default=False)('batch_normalization', **kwargs)

    def map_eltwise(self, node):
        operations = {0: 'multiply', 1: 'add', 2: 'max'}
        op_code = node.parameters.operation
        try:
194
            return PaddleNode(operations[op_code])
195 196 197 198
        except KeyError:
            raise KaffeError('Unknown elementwise operation: {}'.format(
                op_code))

199 200
    def map_scale(self, node):
        params = node.parameters
201
        return PaddleNode('scale', axis=params.axis, num_axes=params.num_axes)
202

203 204 205 206
    def commit(self, chains):
        return chains


207
class PaddleEmitter(object):
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
    def __init__(self, tab=None):
        self.tab = tab or ' ' * 4
        self.prefix = ''
        self.net_name = ''

    def indent(self):
        self.prefix += self.tab

    def outdent(self):
        self.prefix = self.prefix[:-len(self.tab)]

    def statement(self, s):
        return self.prefix + s + '\n'

    def emit_imports(self):
        import inspect
        codes = []
        codes.append(
            '### generated by caffe2fluid, your net is in class "%s" ###\n' %
            (self.net_name))
        network_source = inspect.getsource(network)
        codes.append(network_source + '\n')
        return self.statement('\n'.join(codes))

    def emit_setup_def(self):
        return self.statement('def setup(self):')

235
    def get_inputs_info(self, input_nodes):
W
wanglong03 已提交
236 237 238 239 240 241 242 243
        input_shapes = {}
        for n in input_nodes:
            name = n.name
            output_shape = n.output_shape
            shape = [str(s) for s in output_shape[1:]]
            input_shapes[name] = ', '.join(shape)
        input_shapes = ['"%s": [%s]' % (n, l) for n, l in input_shapes.items()]
        shape_str = ','.join(input_shapes)
244
        return '{%s}' % (shape_str)
245 246 247 248 249 250 251 252

    def emit_main_def(self, name):
        if name is None:
            return ''

        self.prefix = ''
        main_def = self.statement('if __name__ == "__main__":')
        self.indent()
253
        main_def += self.statement('exit(main())')
254 255 256 257 258 259 260 261 262 263 264 265 266 267
        return '\n\n' + main_def

    def emit_parents(self, chain):
        assert len(chain)
        s = 'self.feed('
        sep = ', \n' + self.prefix + (' ' * len(s))
        s += sep.join(
            ["'%s'" % parent.name for parent in chain[0].node.parents])
        return self.statement(s + ')')

    def emit_node(self, node):
        return self.statement('self.' + node.emit())

    def emit(self, name, chains, input_nodes=None):
268 269 270
        from ..net_template import generate_net_code
        from ..net_template import generate_main_code

271
        self.net_name = name
272 273
        inputs_info = self.get_inputs_info(input_nodes)

274
        s = self.emit_imports()
275
        s += generate_net_code(name, inputs_info) + '\n'
276
        self.indent()
277 278

        # define the net using api
279 280 281 282 283 284 285 286 287 288
        s += self.emit_setup_def()
        self.indent()
        blocks = []
        for chain in chains:
            b = ''
            b += self.emit_parents(chain)
            for node in chain:
                b += self.emit_node(node)
            blocks.append(b[:-1])
        s = s + '\n\n'.join(blocks)
289 290 291

        # define the main function
        s += '\n\n\n' + generate_main_code(name)
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
        s += self.emit_main_def(name)
        return s


class Transformer(object):
    def __init__(self, def_path, data_path, verbose=True, phase='test'):
        self.verbose = verbose
        self.phase = phase
        self.load(def_path, data_path, phase)
        self.params = None
        self.source = None

    def load(self, def_path, data_path, phase):
        # Build the graph
        graph = GraphBuilder(def_path, phase).build()

        if data_path is not None:
            # Load and associate learned parameters
            graph = DataInjector(def_path, data_path)(graph)

        # Transform the graph
        transformers = [
            # Fuse split batch normalization layers
            BatchNormScaleBiasFuser(),

            # Fuse ReLUs
            # TODO: Move non-linearity application to layer wrapper, allowing
            # any arbitrary operation to be optionally activated.
            ReLUFuser(allowed_parent_types=[
                NodeKind.Convolution, NodeKind.InnerProduct, NodeKind.BatchNorm
            ]),

            # Rename nodes
325
            # Slashes are used for scoping in Paddle. Replace slashes
326 327
            # in node names with underscores.
            # (Caffe's GoogLeNet implementation uses slashes)
328 329 330 331 332 333 334
            NodeRenamer(lambda node: node.name.replace('/', '_')),

            # Fuse Crop
            # Crop is to return a scalar output Blob for an input Blob of arbitrary size.
            # When one of the input Blob is "input" or "DummyData", we can remove this input Blob
            # and put the shape into the reduction layer.
            CropFuser()
335
        ]
336

337 338
        self.graph = graph.transformed(transformers)

339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
        #for the purpose of recording name mapping because of fused nodes
        trace = SubNodeFuser.traced_names()
        chg2real = {}
        deleted = {}
        for k, v in trace.items():
            chg2real[k] = v[-1]  #mapping from changed-name to real-name
            for n in v:
                if n in chg2real:
                    continue
                if n not in deleted:
                    deleted[n] = '%s.%s' % (k, v[-1])

        self.graph.add_name_trace({
            'chg2real': chg2real,
            'deleted': deleted
        }, 'paddle')

356 357 358 359 360 361 362
        # Display the graph
        if self.verbose:
            print_stderr(self.graph)

    def transform_data(self):
        if self.params is None:
            transformers = [
363
                # Reshape the parameters to Paddle's ordering
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
                DataReshaper({
                    # (c_o, c_i) -> (c_i, c_o)
                    NodeKind.InnerProduct: (1, 0)
                }),

                # Pre-process batch normalization data
                BatchNormPreprocessor(),

                # Convert parameters to dictionaries
                ParameterNamer(),
            ]
            self.graph = self.graph.transformed(transformers)
            self.params = {
                node.name: node.data
                for node in self.graph.nodes if node.data
            }
380 381
            self.params['caffe2fluid_name_trace'] = self.graph.get_name_trace()

382 383 384 385
        return self.params

    def transform_source(self):
        if self.source is None:
386
            mapper = PaddleMapper(self.graph)
387
            chains = mapper.map()
388
            emitter = PaddleEmitter()
389 390 391
            input_nodes = self.graph.get_input_nodes()
            self.source = emitter.emit(self.graph.name, chains, input_nodes)
        return self.source