transformer.py 13.5 KB
Newer Older
R
Renwb1991 已提交
1
import numpy as np
2
from past.builtins import basestring
R
Renwb1991 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
from ..errors import KaffeError, print_stderr
from ..graph import GraphBuilder, NodeMapper
from ..layers import NodeKind
from ..transformers import (DataInjector, DataReshaper, NodeRenamer,
                            SubNodeFuser, ReLUFuser, BatchNormScaleBiasFuser,
                            BatchNormPreprocessor, ParameterNamer, CropFuser)
from . import network


class PaddleNode(object):
    '''An intermediate representation for Paddle operations.'''

    def __init__(self, op, *args, **kwargs):
        # A string corresponding to the Paddle operation
        self.op = op
        # Positional arguments for the operation
        self.args = args
        # Keyword arguments for the operation
        self.kwargs = list(kwargs.items())
        # The source Caffe node
        self.node = None

    def format(self, arg):
        '''Returns a string representation for the given value.'''
        return "'%s'" % arg if isinstance(arg, basestring) else str(arg)

    def pair(self, key, value):
        '''Returns key=formatted(value).'''
        return '%s=%s' % (key, self.format(value))

    def emit(self):
        '''Emits the Python source for this node.'''
        # Format positional arguments
        args = map(self.format, self.args)
37 38
        args = list(args)

R
Renwb1991 已提交
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
        # Format any keyword arguments
        if self.kwargs:
            args += [self.pair(k, v) for k, v in self.kwargs]
        # Set the node name
        args.append(self.pair('name', self.node.name))
        args = ', '.join(args)
        return '%s(%s)' % (self.op, args)


class MaybeActivated(object):
    def __init__(self, node, default=True):
        self.inject_kwargs = {}
        if node.metadata.get('relu', False) != default:
            self.inject_kwargs['relu'] = not default

        default_slope = 0.0
        slope = node.metadata.get('relu_negative_slope', default_slope)
        if slope != default_slope:
            self.inject_kwargs['relu_negative_slope'] = slope

    def __call__(self, *args, **kwargs):
        kwargs.update(self.inject_kwargs)
        return PaddleNode(*args, **kwargs)


class PaddleMapper(NodeMapper):
    def get_kernel_params(self, node):
        kernel_params = node.layer.kernel_parameters
        input_shape = node.get_only_parent().output_shape
        padding = [kernel_params.pad_h, kernel_params.pad_w]
        if padding[0] == 0 and padding[1] == 0:
            padding = {}
        else:
            padding = {'padding': padding}
        return (kernel_params, padding)

    def map_convolution(self, node):
        (kernel_params, kwargs) = self.get_kernel_params(node)
        h = kernel_params.kernel_h
        w = kernel_params.kernel_w
        c_o = node.output_shape[1]
        c_i = node.parents[0].output_shape[1]
        group = node.parameters.group
        if group != 1:
            kwargs['group'] = group
        if not node.parameters.bias_term:
            kwargs['biased'] = False

        if kernel_params.dila_h != 1 or kernel_params.dila_w != 1:
            kwargs['dilation'] = (kernel_params.dila_h, kernel_params.dila_w)

        assert kernel_params.kernel_h == h
        assert kernel_params.kernel_w == w
        return MaybeActivated(node)(
            'conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o,
            kernel_params.stride_h, kernel_params.stride_w, **kwargs)

    def map_deconvolution(self, node):
        (kernel_params, kwargs) = self.get_kernel_params(node)
        h = kernel_params.kernel_h
        w = kernel_params.kernel_w
        c_o = node.output_shape[1]
        c_i = node.parents[0].output_shape[1]
        if not node.parameters.bias_term:
            kwargs['biased'] = False

        if kernel_params.dila_h != 1 or kernel_params.dila_w != 1:
            kwargs['dilation'] = (kernel_params.dila_h, kernel_params.dila_w)

        assert kernel_params.kernel_h == h
        assert kernel_params.kernel_w == w
        return MaybeActivated(node)(
            'deconv', kernel_params.kernel_h, kernel_params.kernel_w, c_o,
            kernel_params.stride_h, kernel_params.stride_w, **kwargs)

    def map_relu(self, node):
        return PaddleNode('relu')

    def map_prelu(self, node):
        channel_shared = getattr(node.parameters, 'channel_shared', False)
        return PaddleNode('prelu', channel_shared)

    def map_tanh(self, node):
        return PaddleNode('tanh')

    def map_pooling(self, node):
        pool_type = node.parameters.pool
        if pool_type == 0:
            pool_op = 'max_pool'
        elif pool_type == 1:
            pool_op = 'avg_pool'
        else:
            # Stochastic pooling, for instance.
            raise KaffeError('Unsupported pooling type.')

        ceil_mode = getattr(node.layer.parameters, 'ceil_mode', True)
        global_pool = getattr(node.layer.parameters, 'global_pooling', False)
        if global_pool:
            input_shape = node.get_only_parent().output_shape
            return PaddleNode(pool_op, input_shape.height, input_shape.width, 1,
                              1, ceil_mode)
        else:
            (kernel_params, padding) = self.get_kernel_params(node)
            return PaddleNode(pool_op, kernel_params.kernel_h,
                              kernel_params.kernel_w, kernel_params.stride_h,
                              kernel_params.stride_w, ceil_mode, **padding)

    def map_sigmoid(self, node):
        return PaddleNode('sigmoid')

    def map_custom(self, node):
        from .. import custom_layers
        return custom_layers.make_node(PaddleNode, node.kind, node)

    def map_inner_product(self, node):
        #TODO: Axis
        assert node.parameters.axis == 1
        #TODO: Unbiased
        assert node.parameters.bias_term == True
        return MaybeActivated(node)('fc', node.parameters.num_output)

    def map_softmax(self, node):
        return PaddleNode('softmax', node.parameters.axis)

    def map_lrn(self, node):
        params = node.parameters
        # The window size must be an odd value. For a window
        # size of (2*n+1), Paddle defines depth_radius = n.
        assert params.local_size % 2 == 1
        # Caffe scales by (alpha/(2*n+1)), whereas Paddle
        # just scales by alpha (as does Krizhevsky's paper).
        # We'll account for that here.
        alpha = params.alpha / float(params.local_size)
        return PaddleNode('lrn', params.local_size, alpha, params.beta)

    def map_concat(self, node):
        return PaddleNode('concat', node.parameters.axis)

    def map_dropout(self, node):
        return PaddleNode('dropout', node.parameters.dropout_ratio)

    def map_batch_norm(self, node):
        scale_offset = len(node.data) == 4

        #this default value comes from caffe's param in batch_norm
        default_eps = 1e-5
        kwargs = {'scale_offset': scale_offset}
        if node.parameters.eps != default_eps:
            kwargs['eps'] = node.parameters.eps

        return MaybeActivated(
            node, default=False)('batch_normalization', **kwargs)

    def map_eltwise(self, node):
        operations = {0: 'multiply', 1: 'add', 2: 'max'}
        op_code = node.parameters.operation
        try:
            return PaddleNode(operations[op_code])
        except KeyError:
            raise KaffeError('Unknown elementwise operation: {}'.format(
                op_code))

    def map_scale(self, node):
        params = node.parameters
        return PaddleNode('scale', axis=params.axis, num_axes=params.num_axes)

    def commit(self, chains):
        return chains


class PaddleEmitter(object):
    def __init__(self, tab=None):
        self.tab = tab or ' ' * 4
        self.prefix = ''
        self.net_name = ''

    def indent(self):
        self.prefix += self.tab

    def outdent(self):
        self.prefix = self.prefix[:-len(self.tab)]

    def statement(self, s):
        return self.prefix + s + '\n'

    def emit_imports(self):
        import inspect
        codes = []
        codes.append(
            '### generated by caffe2fluid, your net is in class "%s" ###\n' %
            (self.net_name))
        network_source = inspect.getsource(network)
        codes.append(network_source + '\n')
        return self.statement('\n'.join(codes))

    def emit_setup_def(self):
        return self.statement('def setup(self):')

    def get_inputs_info(self, input_nodes):
        input_shapes = {}
        for n in input_nodes:
            name = n.name
            output_shape = n.output_shape
            shape = [str(s) for s in output_shape[1:]]
            input_shapes[name] = ', '.join(shape)
        input_shapes = ['"%s": [%s]' % (n, l) for n, l in input_shapes.items()]
        shape_str = ','.join(input_shapes)
        return '{%s}' % (shape_str)

    def emit_main_def(self, name):
        if name is None:
            return ''

        self.prefix = ''
        main_def = self.statement('if __name__ == "__main__":')
        self.indent()
        main_def += self.statement('exit(main())')
        return '\n\n' + main_def

    def emit_parents(self, chain):
        assert len(chain)
        s = 'self.feed('
        sep = ', \n' + self.prefix + (' ' * len(s))
        s += sep.join(
            ["'%s'" % parent.name for parent in chain[0].node.parents])
        return self.statement(s + ')')

    def emit_node(self, node):
        return self.statement('self.' + node.emit())

    def emit(self, name, chains, input_nodes=None):
        from ..net_template import generate_net_code
        from ..net_template import generate_main_code

        self.net_name = name
        inputs_info = self.get_inputs_info(input_nodes)

        s = self.emit_imports()
        s += generate_net_code(name, inputs_info) + '\n'
        self.indent()

        # define the net using api
        s += self.emit_setup_def()
        self.indent()
        blocks = []
        for chain in chains:
            b = ''
            b += self.emit_parents(chain)
            for node in chain:
                b += self.emit_node(node)
            blocks.append(b[:-1])
        s = s + '\n\n'.join(blocks)

        # define the main function
        s += '\n\n\n' + generate_main_code(name)
        s += self.emit_main_def(name)
        return s


class Transformer(object):
    def __init__(self, def_path, data_path, verbose=True, phase='test'):
        self.verbose = verbose
        self.phase = phase
        self.load(def_path, data_path, phase)
        self.params = None
        self.source = None

    def load(self, def_path, data_path, phase):
        # Build the graph
        graph = GraphBuilder(def_path, phase).build()

        if data_path is not None:
            # Load and associate learned parameters
            graph = DataInjector(def_path, data_path)(graph)

        # Transform the graph
        transformers = [
            # Fuse split batch normalization layers
            BatchNormScaleBiasFuser(),

            # Fuse ReLUs
            # TODO: Move non-linearity application to layer wrapper, allowing
            # any arbitrary operation to be optionally activated.
            ReLUFuser(allowed_parent_types=[
                NodeKind.Convolution, NodeKind.InnerProduct, NodeKind.BatchNorm
            ]),

            # Rename nodes
            # Slashes are used for scoping in Paddle. Replace slashes
            # in node names with underscores.
            # (Caffe's GoogLeNet implementation uses slashes)
            NodeRenamer(lambda node: node.name.replace('/', '_')),

            # Fuse Crop
            # Crop is to return a scalar output Blob for an input Blob of arbitrary size.
            # When one of the input Blob is "input" or "DummyData", we can remove this input Blob
            # and put the shape into the reduction layer.
            CropFuser()
        ]

        self.graph = graph.transformed(transformers)
340 341 342
        
        
            
R
Renwb1991 已提交
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
        #for the purpose of recording name mapping because of fused nodes
        trace = SubNodeFuser.traced_names()
        chg2real = {}
        deleted = {}
        for k, v in trace.items():
            chg2real[k] = v[-1]  #mapping from changed-name to real-name
            for n in v:
                if n in chg2real:
                    continue
                if n not in deleted:
                    deleted[n] = '%s.%s' % (k, v[-1])

        self.graph.add_name_trace({
            'chg2real': chg2real,
            'deleted': deleted
        }, 'paddle')

        # Display the graph
        if self.verbose:
            print_stderr(self.graph)

    def transform_data(self):
        if self.params is None:
            transformers = [
                # Reshape the parameters to Paddle's ordering
                DataReshaper({
                    # (c_o, c_i) -> (c_i, c_o)
                    NodeKind.InnerProduct: (1, 0)
                }),

                # Pre-process batch normalization data
                BatchNormPreprocessor(),

                # Convert parameters to dictionaries
                ParameterNamer(),
            ]
379
            
R
Renwb1991 已提交
380
            self.graph = self.graph.transformed(transformers)
381
            
R
Renwb1991 已提交
382 383 384 385
            self.params = {
                node.name: node.data
                for node in self.graph.nodes if node.data
            }
386
            
R
Renwb1991 已提交
387 388 389 390 391 392 393 394 395 396 397 398
            self.params['caffe2fluid_name_trace'] = self.graph.get_name_trace()

        return self.params

    def transform_source(self):
        if self.source is None:
            mapper = PaddleMapper(self.graph)
            chains = mapper.map()
            emitter = PaddleEmitter()
            input_nodes = self.graph.get_input_nodes()
            self.source = emitter.emit(self.graph.name, chains, input_nodes)
        return self.source