diff --git a/x2paddle/convert.py b/x2paddle/convert.py index 18accb882b5d4b185816cc0a0538951555830f11..a5aab534ec0916ce236f069bbb9e8f6d2695da80 100644 --- a/x2paddle/convert.py +++ b/x2paddle/convert.py @@ -110,14 +110,17 @@ def onnx2paddle(model_path, save_dir): except: print("onnx is not installed, use \"pip install onnx==1.5.0\".") return + print("Now translating model from onnx to paddle.") from x2paddle.decoder.onnx_decoder import ONNXDecoder - from x2paddle.op_mapper.onnx_op_mapper import ONNXOpMapper - from x2paddle.optimizer.onnx_optimizer import ONNXOptimizer - print("Now translating model from onnx to paddle.") model = ONNXDecoder(model_path) + + from x2paddle.op_mapper.onnx_op_mapper import ONNXOpMapper mapper = ONNXOpMapper(model) + + from x2paddle.optimizer.onnx_optimizer import ONNXOptimizer optimizer = ONNXOptimizer(mapper) + optimizer.delete_redundance_code() mapper.save_inference_model(save_dir) diff --git a/x2paddle/decoder/onnx_backend.py b/x2paddle/decoder/onnx_backend.py new file mode 100644 index 0000000000000000000000000000000000000000..bf06055da4b8e50469dc09136d05af6afdad57b4 --- /dev/null +++ b/x2paddle/decoder/onnx_backend.py @@ -0,0 +1,1074 @@ +## @package onnx +# Module caffe2.python.onnx.backend +"""Backend for running ONNX on Caffe2 + +To run this, you will need to have Caffe2 installed as well. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import os +import collections +from subprocess import Popen, PIPE +import zipfile +import itertools + +# When onnx is built against a version of protobuf that is older than +# that which is vendored with caffe2, onnx will crash if caffe2's +# vendored protobuf is loaded first. We can work around this by +# importing onnx first, which will cause it to go out and pick up the +# system protobuf. +import onnx.backend + +import caffe2 +from caffe2.python import core, workspace, rnn_cell, gru_cell +from caffe2.python.compatibility import container_abcs +from caffe2.python.model_helper import ModelHelper +from caffe2.proto import caffe2_pb2 +import caffe2.python.utils +import numpy as np +import onnx +from onnx import checker, GraphProto, TensorProto, AttributeProto, ModelProto +import onnx.numpy_helper +import onnx.defs +import onnx.optimizer +import onnx.shape_inference +import onnx.utils +from onnx.backend.base import Backend, Device, DeviceType, namedtupledict + +from caffe2.python.onnx.workspace import Workspace +from caffe2.python.onnx.backend_rep import Caffe2Rep +from caffe2.python.onnx.backend_cpp_rep import Caffe2CppRep + +import caffe2.python._import_c_extension as C + +import warnings + + +def force_unicode(s): + try: + return s.decode('utf-8') + except AttributeError: + return s + + +def get_device_option(device): + m = { + DeviceType.CPU: caffe2_pb2.CPU, + DeviceType.CUDA: workspace.GpuDeviceType + } + return core.DeviceOption(m[device.type], device.device_id) + + +class OnnxAttributes(dict): + """ + This is a more convenient way to work with ONNX/Caffe2 attributes + that is not the protobuf representation. + """ + @staticmethod + def from_onnx(args): + d = OnnxAttributes() + for arg in args: + d[arg.name] = convertAttributeProto(arg) + return d + + def caffe2(self, kmap=lambda k: k): + for k, v in self.items(): + if kmap(k) != '': + yield caffe2.python.utils.MakeArgument(kmap(k), v) + + +# TODO: Move this into ONNX main library +def convertAttributeProto(onnx_arg): + """ + Convert an ONNX AttributeProto into an appropriate Python object + for the type. + + NB: Tensor attribute gets returned as the straight proto. + """ + if onnx_arg.HasField('f'): + return onnx_arg.f + elif onnx_arg.HasField('i'): + return onnx_arg.i + elif onnx_arg.HasField('s'): + return onnx_arg.s + elif onnx_arg.HasField('t'): + return onnx_arg.t # this is a proto! + elif onnx_arg.HasField('g'): + return Caffe2Backend._graph_to_net(onnx_arg.g, + Caffe2Backend._known_opset_version) + elif len(onnx_arg.floats): + return list(onnx_arg.floats) + elif len(onnx_arg.ints): + return list(onnx_arg.ints) + elif len(onnx_arg.strings): + return list(onnx_arg.strings) + elif len(onnx_arg.graphs): + retval = [] + # TODO: this doesn't work with RNN ops + for g in onnx_arg.graphs: + retval.append( + Caffe2Backend._graph_to_net(g, + Caffe2Backend._known_opset_version)) + return retval + else: + raise ValueError("Unsupported ONNX attribute: {}".format(onnx_arg)) + + +# TODO: Move this into ONNX main library +class OnnxNode(object): + """ + Reimplementation of NodeProto from ONNX, but in a form + more convenient to work with from Python. + + We may temporarily edit these nodes to get them into Caffe2 form, + before actually translating into the Caffe2 protobuf, since this + is easier than decomposing everything, and putting it back together + when we're ready. + """ + def __init__(self, node): + self.name = str(node.name) + self.op_type = str(node.op_type) + self.attrs = OnnxAttributes.from_onnx(node.attribute) + self.inputs = list(node.input) + self.outputs = list(node.output) + + +Caffe2Ops = collections.namedtuple('Caffe2Ops', + ['ops', 'init_ops', 'interface_blobs']) + + +class Caffe2Backend(Backend): + + # The greatest version of the ONNX operator set which we are aware of. + # Models whose version is larger than this will cause us to emit a warning + # that we are attempting to translate on a "best effort" basis. + # + # If you increase this, make SURE you cross-reference all BC-breaking + # changes from one version to the next, and any that you did not + # implement, mark as broken in _broken_operators + _known_opset_version = 9 + + # This dictionary will record operators which are KNOWN to be + # broken, so we give a good error message rather than do something + # bogus and then fail. + _broken_operators = { + # 'BrokenOp': version_it_was_broken_in + } + + # Operators that are different between Caffe2 and + # ONNX but only in their name. + # In most cases, this should be empty - as the effort of ONNX is + # to unify the operator definitions. + _renamed_operators = { + 'GlobalMaxPool': 'MaxPool', + 'GlobalAveragePool': 'AveragePool', + 'Pad': 'PadImage', + 'Neg': 'Negative', + 'BatchNormalization': 'SpatialBN', + 'InstanceNormalization': 'InstanceNorm', + 'MatMul': 'BatchMatMul', + 'Upsample': 'ResizeNearest', + 'Identity': 'Copy', + 'InstanceNormalization': 'InstanceNorm', + 'Equal': 'EQ', + 'Less': 'LT', + 'Greater': 'GT', + 'Unsqueeze': 'ExpandDims', + 'Loop': 'ONNXWhile', + 'Tile': 'NumpyTile', + 'RandomNormal': 'GaussianFill', + 'RandomUniform': 'UniformFill', + } + + _global_renamed_attrs = {'kernel_shape': 'kernels'} + _per_op_renamed_attrs = { + 'Squeeze': { + 'axes': 'dims' + }, + 'Unsqueeze': { + 'axes': 'dims' + }, + 'Transpose': { + 'perm': 'axes' + }, + 'Upsample': { + 'mode': '', + 'scales': '' + }, + 'ConvTranspose': { + 'output_padding': 'adjs' + }, + 'Selu': { + 'gamma': 'scale' + }, + 'If': { + 'then_branch': 'then_net', + 'else_branch': 'else_net' + }, + 'RandomUniform': { + 'low': 'min', + 'high': 'max' + } + } + + # operators whose behavior is different beyond renaming + # the value is an attribute of this class that is a + # function from ToffeIR node_def to caffe2 op_def + _special_operators = { + 'LSTM': '_create_rnn_variant', + 'GRU': '_create_rnn_variant', + 'RNN': '_create_rnn_variant', + 'Loop': '_create_loop', + 'If': '_create_if', + 'Upsample': '_create_upsample', + 'RandomNormal': '_create_gaussian_fill' + } + + # Dummy name generator + _dummy_name = C.DummyName() + + @classmethod + def dummy_name(cls): + return cls._dummy_name.new_dummy_name() + + # NB: By default, you will use the LATEST definition of the operator, + # so this interface MAY make BC-breaking changes. Specify an + # opset_version if you don't want this to version. + @classmethod + def run_node(cls, + node, + inputs, + device='CPU', + opset_version=_known_opset_version, + outputs_info=None): + super(Caffe2Backend, cls).run_node(node, + inputs, + device=device, + outputs_info=outputs_info, + opset_version=opset_version) + + value_infos = [] + device_option = get_device_option(Device(device)) + ws = Workspace() + with core.DeviceScope(device_option): # temporary! + if isinstance(inputs, dict): + for key, value in inputs.items(): + ws.FeedBlob(key, value) + value_infos.append( + onnx.helper.make_tensor_value_info( + name=key, + elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[ + value.dtype], + shape=value.shape).SerializeToString()) + else: + assert len(node.input) == len( + inputs), "{}: expected {} but got {}".format( + node.op_type, len(node.input), len(inputs)) + for key, value in zip(node.input, inputs): + ws.FeedBlob(key, value) + value_infos.append( + onnx.helper.make_tensor_value_info( + name=key, + elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[ + value.dtype], + shape=value.shape).SerializeToString()) + + ops = [] + cbackend = C.Caffe2Backend(cls._dummy_name) + ops_str = cbackend.convert_node(node.SerializeToString(), + value_infos, opset_version) + for s in ops_str[0] + ops_str[1]: + op = caffe2_pb2.OperatorDef() + op.ParseFromString(s) + op.device_option.CopyFrom(device_option) + ops.append(op) + ws.RunOperatorsOnce(ops) + output_values = [ws.FetchBlob(name) for name in node.output] + return namedtupledict('Outputs', node.output)(*output_values) + + @classmethod + def _create_tensor_filling_op(cls, onnx_tensor, name=None): + """ + Given an Onnx TensorProto, translate it into a Caffe2 operator + which produces the given tensor filling op. + """ + assert name or onnx_tensor.name + name = name or onnx_tensor.name + + c2_op = caffe2_pb2.OperatorDef() + + c2_values = c2_op.arg.add() + c2_values.name = "values" + + def tensor2list(onnx_tensor): + # Use the onnx.numpy_helper because the data may be raw + return onnx.numpy_helper.to_array(onnx_tensor).flatten().tolist() + + if onnx_tensor.data_type in [TensorProto.FLOAT]: + c2_op.type = 'GivenTensorFill' + c2_values.floats.extend(tensor2list(onnx_tensor)) + elif onnx_tensor.data_type in [TensorProto.DOUBLE]: + c2_op.type = 'GivenTensorDoubleFill' + c2_values.floats.extend(tensor2list(onnx_tensor)) + elif onnx_tensor.data_type in [TensorProto.INT64, TensorProto.UINT32]: + c2_op.type = 'GivenTensorInt64Fill' + c2_values.ints.extend(tensor2list(onnx_tensor)) + elif onnx_tensor.data_type in [ + TensorProto.UINT8, TensorProto.INT8, TensorProto.UINT16, + TensorProto.INT16, TensorProto.INT32 + ]: + c2_op.type = 'GivenTensorIntFill' + c2_values.ints.extend(tensor2list(onnx_tensor)) + elif onnx_tensor.data_type == TensorProto.BOOL: + c2_op.type = 'GivenTensorBoolFill' + c2_values.ints.extend(tensor2list(onnx_tensor)) + elif onnx_tensor.data_type == TensorProto.STRING: + c2_op.type = 'GivenTensorStringFill' + c2_values.strings.extend(onnx_tensor.string_data) + else: + raise RuntimeError("unrecognized tensor type {}".format( + onnx_tensor.data_type)) + + c2_shape = c2_op.arg.add() + c2_shape.name = "shape" + c2_shape.ints.extend(onnx_tensor.dims) + + c2_op.output.append(name) + + return c2_op + + @classmethod + def _rnn_reform_weights(cls, reforms, name, hidden_size, init_net, gates, + reorder_indices): + for name_from, name_to, do_concat, extra_dims in reforms: + gate_blobs = [ + '%s/%s_%s' % (name, prefix, name_to) for prefix in gates + ] + for i, x in enumerate(gate_blobs): + dim0 = i * hidden_size, (i + 1) * hidden_size + starts, ends = zip(dim0, *extra_dims) + init_net.Slice(name_from, x, starts=starts, ends=ends) + if do_concat: + reordered_gate_blobs = [gate_blobs[i] for i in reorder_indices] + init_net.Concat(reordered_gate_blobs, + ['%s/%s' % (name, name_to), + cls.dummy_name()], + axis=0) + + @classmethod + def _make_rnn_direction(cls, input_blob, B, W, R, initial_states_and_names, + sequence_lens, pred_mh, init_net, input_size, + hidden_size, num_gates, direction_offset, Bi, Br, + W_, R_, reform, make_cell, keep_outputs): + name = cls.dummy_name() + + # input and recurrence biases are squashed together in onnx + # but not in caffe2 + gates_hidden_size = num_gates * hidden_size + bias_offset = 2 * direction_offset * gates_hidden_size + weight_offset = direction_offset * gates_hidden_size + Bi = init_net.Slice(B, + name + Bi, + starts=[bias_offset + 0 * gates_hidden_size], + ends=[bias_offset + 1 * gates_hidden_size]) + Br = init_net.Slice(B, + name + Br, + starts=[bias_offset + 1 * gates_hidden_size], + ends=[bias_offset + 2 * gates_hidden_size]) + W_ = init_net.Slice(W, + name + W_, + starts=[weight_offset + 0 * gates_hidden_size, 0], + ends=[weight_offset + 1 * gates_hidden_size, -1]) + R_ = init_net.Slice(R, + name + R_, + starts=[weight_offset + 0 * gates_hidden_size, 0], + ends=[weight_offset + 1 * gates_hidden_size, -1]) + + initial_states_sliced = [] + for initial_state, name_suffix in initial_states_and_names: + initial_states_sliced.append( + pred_mh.net.Slice(initial_state, + name + name_suffix, + starts=[direction_offset + 0, 0, 0], + ends=[direction_offset + 1, -1, -1])) + + if direction_offset == 1: + if sequence_lens is not None: + seq_lens_for_reverse = sequence_lens + else: + input_shape = pred_mh.net.Shape(input_blob, + name + '/input_shape') + batch_size = pred_mh.net.Slice(input_shape, + name + '/batch_size_slice', + starts=[1], + ends=[2]) + seq_len = pred_mh.net.Slice(input_shape, + name + '/seq_len_slice', + starts=[0], + ends=[1]) + dummy_sequence_lens = pred_mh.net.Tile([seq_len, batch_size], + name + + '/dummy_sequence_lens', + axis=0) + pred_mh.net.Reshape( + dummy_sequence_lens, + [dummy_sequence_lens, cls.dummy_name()], + shape=[-1]) + seq_lens_for_reverse = pred_mh.net.Cast(dummy_sequence_lens, + name + + '/seq_lens_for_reverse', + to=core.DataType.INT32) + reform(Bi, Br, W_, R_, name, hidden_size, init_net) + + if direction_offset == 1: + input = pred_mh.net.ReversePackedSegs( + [input_blob, seq_lens_for_reverse], name + "/input-reversed") + else: + input = input_blob + + outputs = keep_outputs( + list( + make_cell( + pred_mh, + input, + sequence_lens, + initial_states_sliced, + input_size, + hidden_size, + name, + drop_states=False, + forward_only=True, + ))) + + if direction_offset == 1: + outputs[0] = pred_mh.net.ReversePackedSegs( + [outputs[0], seq_lens_for_reverse], name + "/output-reversed") + + return outputs + + @classmethod + def _create_rnn_variant(cls, init_model, pred_model, n, opset_version): + assert init_model is not None, "cannot convert RNNs without access to the full model" + assert pred_model is not None, "cannot convert RNNs without access to the full model" + + attrs = dict(n.attrs) # make a copy, which is safe to mutate + hidden_size = attrs.pop('hidden_size') + direction = force_unicode(attrs.pop('direction', 'forward')) + + if n.op_type == 'RNN': + activation = force_unicode( + attrs.pop('activations', ('tanh', ))[0].lower()) + elif n.op_type == 'GRU': + linear_before_reset = attrs.pop('linear_before_reset', 0) + + assert not attrs, "unsupported RNN attributes: " + str(attrs.keys()) + assert direction in ['forward', 'bidirectional' + ], "unsupported backwards RNN/GRU/LSTM" + + if n.op_type in ['RNN', 'GRU']: + input_blob, W, R, B, sequence_lens, initial_h = n.inputs + elif n.op_type == 'LSTM': + input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs + + if sequence_lens == "": + sequence_lens = None + + for x in itertools.chain(init_model.graph.input, + init_model.graph.value_info, + pred_model.graph.input, + pred_model.graph.value_info): + if x.name == W: + input_size = x.type.tensor_type.shape.dim[2].dim_value + break + else: + raise RuntimeError( + "best-effort shape inference for RNN/GRU/LSTM failed") + + pred_mh = ModelHelper() + init_net = core.Net("init-net") + + init_net.Reshape(W, [W, cls.dummy_name()], shape=[1, -1, 0]) + init_net.Squeeze(W, W, dims=[0]) + init_net.Reshape(R, [R, cls.dummy_name()], shape=[1, -1, 0]) + init_net.Squeeze(R, R, dims=[0]) + init_net.Reshape(B, [B, cls.dummy_name()], shape=[1, -1]) + init_net.Squeeze(B, B, dims=[0]) + + if n.op_type == 'RNN': + + def reform(*args): + pass + + def make_cell(*args, **kwargs): + return rnn_cell.BasicRNN(*args, activation=activation, **kwargs) + + def make_rnn(direction_offset): + return cls._make_rnn_direction( + input_blob, B, W, R, [(initial_h, '/initial_h')], + sequence_lens, pred_mh, init_net, input_size, hidden_size, + 1, direction_offset, "/i2h_b", "/gates_t_b", "/i2h_w", + "/gates_t_w", reform, make_cell, lambda x: x) + + elif n.op_type == 'GRU': + + def reform(Bi, Br, W_, R_, name, hidden_size, init_net): + # caffe2 has a different order from onnx. We need to rearrange + # z r h -> r z h + reforms = ((W_, 'i2h_w', True, [(0, -1)]), (R_, 'gate_t_w', + False, [(0, -1)]), + (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, [])) + cls._rnn_reform_weights(reforms, name, hidden_size, init_net, + ['update', 'reset', 'output'], + [1, 0, 2]) + + def make_cell(*args, **kwargs): + return gru_cell.GRU(*args, + linear_before_reset=linear_before_reset, + **kwargs) + + def make_rnn(direction_offset): + return cls._make_rnn_direction( + input_blob, B, W, R, [(initial_h, '/initial_h')], + sequence_lens, pred_mh, init_net, input_size, hidden_size, + 3, direction_offset, "_bias_i2h", "_bias_gates", + "/i2h_w_pre", "/gates_t_w_pre", reform, make_cell, + lambda x: x) + + elif n.op_type == 'LSTM': + + def reform(Bi, Br, W_, R_, name, hidden_size, init_net): + # caffe2 has a different order from onnx. We need to rearrange + # i o f c -> i f o c + reforms = ((W_, 'i2h_w', True, [(0, -1)]), (R_, 'gates_t_w', + True, [(0, -1)]), + (Bi, 'i2h_b', True, []), (Br, 'gates_t_b', True, [])) + cls._rnn_reform_weights(reforms, name, hidden_size, init_net, + ['input', 'output', 'forget', 'cell'], + [0, 2, 1, 3]) + + def make_cell(*args, **kwargs): + return rnn_cell.LSTM(*args, **kwargs) + + def make_rnn(direction_offset): + return cls._make_rnn_direction( + input_blob, B, W, R, [(initial_h, '/initial_h'), + (initial_c, '/initial_c')], + sequence_lens, pred_mh, init_net, input_size, hidden_size, + 4, direction_offset, "/i2h_b", "/gates_t_b", "/i2h_w", + "/gates_t_w", reform, make_cell, + lambda x: [x[0], x[1], x[3]]) + + if direction == 'forward': + outputs = make_rnn(0) + + # in the forward case, storage is shared between the + # last outputs. We need to decouple them so that the + # VariableLengthSequencePadding only mutates + # n.outputs[0] + for i in range(1, len(outputs)): + pred_mh.net.Copy(outputs[i], n.outputs[i]) + + if sequence_lens is not None: + pred_mh.net.VariableLengthSequencePadding( + [outputs[0], sequence_lens], [outputs[0]]) + pred_mh.net.ExpandDims([outputs[0]], [n.outputs[0]], dims=[1]) + elif direction == 'bidirectional': + outputs_f = make_rnn(0) + outputs_b = make_rnn(1) + + concatted_output, _ = pred_mh.net.Concat( + [outputs_f[0], outputs_b[0]], + [cls.dummy_name(), cls.dummy_name()], + axis=2) + if sequence_lens is not None: + pred_mh.net.VariableLengthSequencePadding( + [concatted_output, sequence_lens], [concatted_output]) + reshaped_output, _ = pred_mh.net.Reshape( + concatted_output, + [cls.dummy_name(), cls.dummy_name()], + shape=[0, 0, -1, 2]) + pred_mh.net.Transpose(reshaped_output, + n.outputs[0], + axes=[0, 2, 1, 3]) + for i in range(1, len(n.outputs)): + pred_mh.net.Concat( + [outputs_f[i], outputs_b[i]], + [n.outputs[i], cls.dummy_name()], + axis=0) + + # We want to decide whether to put all of our weight-reshaping + # operators in the init net or the predict net. We can put + # them in the init net iff the inputs to those operators are + # already available, either as graph initializers, or as the + # output of other operators in the init net. The latter case + # occurs, for example, when exporting from pytorch to onnx. + # In most production use, we expect has_initializers to be + # true. + initializers = {i.name for i in init_model.graph.initializer} + outputs = { + output + for node in init_model.graph.node for output in node.output + } + has_initializers = all(x in initializers or x in outputs + for x in (W, R, B)) + + pred_ops = [] + init_ops = [] + (init_ops if has_initializers else pred_ops).extend(init_net.Proto().op) + pred_ops.extend(pred_mh.Proto().op) + + return Caffe2Ops(pred_ops, init_ops, + list(pred_mh.Proto().external_input)) + + @classmethod + def _create_control_op(cls, init_model, pred_model, n, opset_version): + control_inputs = [] + if '__control_inputs' in n.attrs: + control_inputs.extend(n.attrs['__control_inputs']) + node = cls._common_onnx_node_to_caffe2_op(init_model, pred_model, n, + opset_version) + node.control_input.extend(control_inputs) + return Caffe2Ops([node], [], []) + + @classmethod + def _remove_ssa(cls, net, remap_dict): + for op in net.op: + for i, name in enumerate(op.output): + if name in remap_dict: + op.output[i] = remap_dict[name] + for i, out in enumerate(net.external_output): + if out in remap_dict: + net.external_output[i] = remap_dict[out] + + @classmethod + def _create_if(cls, init_model, pred_model, n, opset_version): + ops = cls._create_control_op(init_model, pred_model, n, opset_version) + assert ops[0][0].type == 'If' + if_op = ops[0][0] + then_net = else_net = None + control_inputs = [] + for arg in if_op.arg: + if arg.name == 'then_net': + then_net = arg.n + if arg.name == 'else_net': + else_net = arg.n + if arg.name == '__control_inputs': + control_inputs = arg.strings + + assert then_net and else_net + then_net_outs = then_net.external_output + else_net_outs = else_net.external_output + op_outputs = if_op.output + assert len(then_net_outs) == len(else_net_outs) + assert len(else_net_outs) == len(op_outputs) + + for arg in if_op.arg: + if arg.name == 'then_net': + arg.n.external_input.extend(control_inputs) + if arg.name == 'else_net': + arg.n.external_input.extend(control_inputs) + + return ops + + @classmethod + def _create_loop(cls, init_model, pred_model, n, opset_version): + ops = cls._create_control_op(init_model, pred_model, n, opset_version) + assert ops[0][0].type == 'ONNXWhile' + while_op = ops[0][0] + while_op.arg.extend( + [caffe2.python.utils.MakeArgument('has_trip_count', True)]) + while_op.arg.extend( + [caffe2.python.utils.MakeArgument('has_cond', True)]) + while_op.arg.extend( + [caffe2.python.utils.MakeArgument('disable_scopes', True)]) + control_inputs = [] + for arg in while_op.arg: + if arg.name == '__control_inputs': + control_inputs = arg.strings + num_loop_carried_deps = 0 + for arg in while_op.arg: + if arg.name == 'body': + num_loop_carried_deps = len(arg.n.external_input) - 2 + arg.n.external_input.extend(control_inputs) + while_op.arg.extend([ + caffe2.python.utils.MakeArgument('num_loop_carried_deps', + num_loop_carried_deps) + ]) + + return ops + + @classmethod + def _substitute_raw_value(cls, tp, raw_values_dict): + if tp.HasField('raw_data') and tp.raw_data == bytes(b'__EXTERNAL'): + if tp.name not in raw_values_dict: + raise RuntimeError( + 'TensorProto for value {} referenced raw data but it was not found!' + .format(tp.name)) + else: + tp.raw_data = raw_values_dict[tp.name] + + @classmethod + def _visit_and_substitute_raw_values(cls, nodes, raw_values_dict): + for node in nodes: + for attr in node.attribute: + if attr.HasField('t'): + cls._substitute_raw_value(attr.t, raw_values_dict) + for t in attr.tensors: + cls._substitute_raw_value(t, raw_values_dict) + if attr.HasField('g'): + cls._visit_and_substitute_raw_values( + attr.g.node, raw_values_dict) + for g in attr.graphs: + cls._visit_and_substitute_raw_values( + g.node, raw_values_dict) + + @classmethod + def _external_value_resolution_pass(cls, model, raw_values_dict): + for init in model.graph.initializer: + cls._substitute_raw_value(init, raw_values_dict) + + cls._visit_and_substitute_raw_values(model.graph.node, raw_values_dict) + + @classmethod + def _direct_initialize_parameters(cls, initializer, ws, device_option): + for tp in initializer: + ws.FeedBlob(tp.name, onnx.numpy_helper.to_array(tp), device_option) + + @classmethod + def _direct_initialize_inputs(cls, inputs, initialized, ws, device_option): + for value_info in inputs: + if value_info.name in initialized: + continue + shape = list(d.dim_value + for d in value_info.type.tensor_type.shape.dim) + ws.FeedBlob( + value_info.name, + np.ones(shape, + dtype=onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[ + value_info.type.tensor_type.elem_type]), + device_option) + + @staticmethod + def optimize_onnx(input, init=False, predict=False): + passes = [ + 'fuse_consecutive_transposes', 'eliminate_nop_transpose', + 'fuse_transpose_into_gemm', 'lift_lexical_references' + ] + if init: + passes.append('split_init') + if predict: + passes.append('split_predict') + out = onnx.optimizer.optimize(input, passes) + return out + + @classmethod + def prepare_zip_archive(cls, file, device='CPU', **kwargs): + with zipfile.ZipFile(file, mode='r') as z: + with z.open('__MODEL_PROTO', 'r') as f: + model = onnx.load(f) + blob_names = set(z.namelist()) - set('__MODEL_PROTO') + # TODO: make this more efficient + raw_values_dict = {} + for name in blob_names: + with z.open(name, 'r') as blob_file: + raw_values_dict[name] = blob_file.read() + + return cls.prepare(model, + device, + raw_values_dict=raw_values_dict, + **kwargs) + + @classmethod + def prepare(cls, model, device='CPU', raw_values_dict=None, **kwargs): + ''' + For Onnx Caffe2Backend, we require that init_graph don't initialize the actual input of the predict_graph, + + for example, if "img" is the input blob for the predict_net, we require that in init_graph and in + initializer of the predict_graph, "img" is not initalized. We don't have a check for this, since + there is no way we can know which blob is the input of the predict_graph. + ''' + if not kwargs.pop('no_check_UNSAFE', False): + super(Caffe2Backend, cls).prepare(model, device, **kwargs) + opset_version = None + for imp in model.opset_import: + if not imp.HasField("domain") or imp.domain == "": + opset_version = imp.version + if imp.version > cls._known_opset_version: + warnings.warn( + "This version of onnx-caffe2 targets ONNX operator set version {}, but the model we are trying to import uses version {}. We will try to import it anyway, but if the model uses operators which had BC-breaking changes in the intervening versions, import will fail." + .format(cls._known_opset_version, imp.version)) + else: + warnings.warn("Unrecognized operator set {}".format(imp.domain)) + if opset_version is None: + if model.ir_version >= 0x00000003: + raise RuntimeError( + "Model with IR version >= 3 did not specify ONNX operator set version (onnx-caffe2 requires it)" + ) + else: + opset_version = 1 + + ws = Workspace() + device_option = get_device_option(Device(device)) + + init_net, predict_net = cls._onnx_model_to_caffe2_net( + model, device, opset_version, False) + + if raw_values_dict: + cls._external_value_resolution_pass(model, raw_values_dict) + + # Directly load initializer data into blobs in workspace + cls._direct_initialize_parameters( + model.graph.initializer, + ws, + device_option, + ) + + initialized = {init.name for init in model.graph.initializer} + + cls._direct_initialize_inputs( + model.graph.input, + initialized, + ws, + device_option, + ) + + uninitialized = [ + value_info.name for value_info in model.graph.input + if value_info.name not in initialized + ] + + retval = Caffe2Rep(init_net, predict_net, ws, uninitialized) + return retval + + @classmethod + # TODO: This method needs a refactor for clarity + def _onnx_node_to_caffe2_op(cls, init_model, pred_model, node_def, + opset_version): + cbackend = C.Caffe2Backend(cls._dummy_name) + if cbackend.support_onnx_import(node_def.op_type): + + # extract value infos from pred model (value infos of + # node's inputs that are in init model should be all + # available in pred model) + value_infos = [] + for name in node_def.input: + if pred_model is not None: + for vi in itertools.chain(pred_model.graph.input, + pred_model.graph.output, + pred_model.graph.value_info): + if vi.name == name: + value_infos.append(vi.SerializeToString()) + + op_strs = cbackend.convert_node(node_def.SerializeToString(), + value_infos, opset_version) + init_ops = [] + for s in op_strs[0]: + op = caffe2_pb2.OperatorDef() + op.ParseFromString(s) + init_ops.append(op) + ops = [] + for s in op_strs[1]: + op = caffe2_pb2.OperatorDef() + op.ParseFromString(s) + ops.append(op) + return Caffe2Ops(ops, init_ops, []) + + if node_def.op_type in cls._special_operators: + translator = getattr(cls, cls._special_operators[node_def.op_type]) + else: + translator = cls._common_onnx_node_to_caffe2_op + ops = translator(init_model, pred_model, OnnxNode(node_def), + opset_version) + if isinstance(ops, Caffe2Ops): + return ops + if not isinstance(ops, container_abcs.Iterable): + ops = [ops] + return Caffe2Ops(ops, [], []) + + _broadcast_operators = { + 'Add', + 'Sub', + } + + @classmethod + def _common_onnx_node_to_caffe2_op(cls, init_model, pred_model, onnx_node, + opset_version): + """ + This translator performs the basic translation of ONNX nodes into + Caffe2 operators. Besides doing a straightforward marshalling from + one format to another, it also does these extra things: + + - Renames operators based on '_renamed_operators' + - Renames attributes based on '_global_renamed_attrs' and + '_per_op_renamed_attrs' + + If you're writing a custom translator, consider calling this first, + and then fixing things up further. + """ + c2_op = caffe2_pb2.OperatorDef() + + c2_op.input.extend(onnx_node.inputs) + c2_op.output.extend(onnx_node.outputs) + c2_op.name = onnx_node.name + + onnx_op_type = onnx_node.op_type + broken_version = cls._broken_operators.get(onnx_op_type, float('Inf')) + if broken_version <= opset_version: + raise ValueError( + "Don't know how to translate op {} in ONNX operator set v{} (I only support prior to v{})" + .format(onnx_op_type, opset_version, broken_version)) + c2_op.type = cls._renamed_operators.get(onnx_op_type, onnx_op_type) + if not core.IsOperator(c2_op.type): + raise ValueError( + "Don't know how to translate op {}".format(onnx_op_type)) + + def kmap(k): + if (onnx_op_type in cls._per_op_renamed_attrs + and k in cls._per_op_renamed_attrs[onnx_op_type]): + return cls._per_op_renamed_attrs[onnx_op_type][k] + if k in cls._global_renamed_attrs: + return cls._global_renamed_attrs[k] + return k + + c2_op.arg.extend(onnx_node.attrs.caffe2(kmap=kmap)) + + if opset_version < 7: + # onnx opset 7 and newest caffe2 have adopted full onnx broadcast semantics + # so we don't need this hack anymore + if c2_op.type in cls._broadcast_operators: + already_broadcast = False + for arg in c2_op.arg: + if arg.name == 'broadcast': + already_broadcast = True + if not already_broadcast: + c2_op.arg.extend( + [caffe2.python.utils.MakeArgument('broadcast', 1)]) + + return c2_op + + @staticmethod + def _all_names_in_graph(graph): + if graph is None: + return set() + + names = set() + names.update(value_info.name for value_info in graph.input) + names.update(value_info.name for value_info in graph.output) + for node in graph.node: + names.update(node.input) + names.update(node.output) + return names + + @classmethod + def _graph_to_net(cls, onnx_graph, opset_version): + net = caffe2_pb2.NetDef() + for node in onnx_graph.node: + try: + c2ops = cls._onnx_node_to_caffe2_op(None, None, node, + opset_version) + except Exception as e: + print('ONNX FATAL:', e) + continue + net.op.extend(c2ops.init_ops) + net.op.extend(c2ops.ops) + net.external_input.extend(c2ops.interface_blobs) + net.external_output.extend(value_info.name + for value_info in onnx_graph.output) + net.external_input.extend(value_info.name + for value_info in onnx_graph.input) + return net + + @classmethod + def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, + include_initializers): + device_option = get_device_option(Device(device)) + + # init_model = cls.optimize_onnx(onnx_model, init=True) + # pred_model = cls.optimize_onnx(onnx_model, predict=True) + + init_model = onnx_model + pred_model = onnx_model + init_net = caffe2_pb2.NetDef() + pred_net = caffe2_pb2.NetDef() + + init_net.name = onnx_model.graph.name + '_init' + pred_net.name = onnx_model.graph.name + '_predict' + + if include_initializers: + init_net.op.extend( + cls._create_tensor_filling_op(tp) + for tp in onnx_model.graph.initializer) + + cls._dummy_name.reset( + cls._all_names_in_graph(init_model.graph) + | cls._all_names_in_graph(pred_model.graph)) + + success = True + for net, model in ((init_net, init_model), (pred_net, pred_model)): + net.device_option.CopyFrom(device_option) + for node in model.graph.node: + try: + c2ops = cls._onnx_node_to_caffe2_op(init_model, pred_model, + node, opset_version) + except Exception as e: + success = False + print('ONNX FATAL:', e) + continue + init_net.op.extend(c2ops.init_ops) + net.op.extend(c2ops.ops) + net.external_input.extend(c2ops.interface_blobs) + net.external_output.extend(value_info.name + for value_info in model.graph.output) + net.external_input.extend(value_info.name + for value_info in model.graph.input) + + if not success: + raise RuntimeError('ONNX conversion failed') + + return init_net, pred_net + + # wrapper for backwards compatability + @classmethod + def onnx_graph_to_caffe2_net(cls, + model, + device="CPU", + opset_version=_known_opset_version): + return cls._onnx_model_to_caffe2_net(model, + device=device, + opset_version=opset_version, + include_initializers=True) + + @classmethod + def supports_device(cls, device_str): + device = Device(device_str) + if device.type == DeviceType.CPU: + return True + elif core.IsGPUDeviceType(device.type): + return workspace.has_gpu_support + return False + + @classmethod + def is_compatible(cls, model, device='CPU', **kwargs): + if hasattr(super(Caffe2Backend, cls), 'is_compatible') \ + and callable(super(Caffe2Backend, cls).is_compatible): + if not super(Caffe2Backend, cls).is_compatible( + model, device, **kwargs): + return False + # TODO: should have an unspported list of operators, be optimistic for now + return True + + +prepare = Caffe2Backend.prepare + +prepare_zip_archive = Caffe2Backend.prepare_zip_archive + +run_node = Caffe2Backend.run_node + +run_model = Caffe2Backend.run_model + +supports_device = Caffe2Backend.supports_device # noqa + +is_compatible = Caffe2Backend.is_compatible diff --git a/x2paddle/decoder/onnx_decoder.py b/x2paddle/decoder/onnx_decoder.py index 9db1420015e0c0f9374dd1a8ac4fe8198e32d60e..959b5b12aeaea4c6a84aca84b79bfe712423d9b5 100644 --- a/x2paddle/decoder/onnx_decoder.py +++ b/x2paddle/decoder/onnx_decoder.py @@ -23,6 +23,7 @@ from onnx.helper import get_attribute_value, make_attribute from onnx.shape_inference import infer_shapes from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE from onnx.numpy_helper import to_array +from onnx import AttributeProto, TensorProto, GraphProto from collections import OrderedDict as Dict import onnx import numpy as np @@ -59,7 +60,6 @@ class ONNXGraphNode(GraphNode): @property def value(self): assert 'Constant' in self.layer_type, "Only Constant | ConstantOfShape node has value." - print(self.layer) attr = self.layer.attribute['value'] if 'value' not in self.attr_map: return None @@ -120,12 +120,15 @@ class ONNXGraphDataNode(GraphNode): class ONNXGraph(Graph): - def __init__(self, model): - super(ONNXGraph, self).__init__(model) + def __init__(self, graph, onnx_model): + super(ONNXGraph, self).__init__(graph) + self.onnx_model = onnx_model self.initializer = {} self.place_holder_nodes = list() self.get_place_holder_nodes() - self.value_infos = self.inferred_model_value_info(model) + + self.value_infos = self.inferred_model_value_info(graph) + self.results_of_inference = dict() def get_inner_nodes(self): """ @@ -162,13 +165,22 @@ class ONNXGraph(Graph): """ build topo_sort of ONNX model """ + data_node = self.place_holder_nodes[0] + value_info = self.value_infos[data_node] + input_shape = value_info['shape'] + self.get_results_of_inference(self.onnx_model, input_shape) for layer in self.model.node: node = ONNXGraphNode(layer) self.node_map[layer.name] = node for opt in layer.output: - value_info = self.value_infos[opt] - node.dtype = value_info['dtype'] - node.out_shapes.append(value_info['shape']) + if opt in self.value_infos: + value_info = self.value_infos[opt] + node.dtype = value_info['dtype'] + node.out_shapes.append(value_info['shape']) + else: + _, dtype, shape = self.get_dynamic_shape(opt) + node.dtype = dtype + node.out_shapes.append(shape) for layer in self.model.input: if layer.name not in self.node_map: @@ -195,10 +207,7 @@ class ONNXGraph(Graph): format(in_node, layer_name)) else: self.connect(in_node, layer_name) - - -# print([layer_name for layer_name, node in self.node_map.items()]) -#generate topo + #generate topo super(ONNXGraph, self).build() self.input_nodes = self.place_holder_nodes @@ -229,7 +238,6 @@ class ONNXGraph(Graph): """ collect value/type info for an ONNX model """ - assert isinstance(graph, onnx.GraphProto), 'model is not a ModelProto instance' @@ -252,6 +260,7 @@ class ONNXGraph(Graph): 'external': True } for item in graph.output: + assert item.name not in value_info value_info[item.name] = { 'dtype': TENSOR_TYPE_TO_NP_TYPE[item.type.tensor_type.elem_type], @@ -261,34 +270,74 @@ class ONNXGraph(Graph): } return value_info + def get_results_of_inference(self, model, shape): + try: + import torch + version = torch.__version__ + if '1.1.0' not in version: + print("your model have dynamic graph, torch==1.1.0 is required") + return + except: + print( + "your model have dynamic graph, we use caff2 to inference graph, please use \"pip install torch==1.1.0\"." + ) + return + from x2paddle.decoder.onnx_backend import prepare + + np_images = np.random.rand(shape[0], shape[1], shape[2], + shape[3]).astype('float32') + + outputs = [] + for node in model.graph.node: + value_info = helper.make_tensor_value_info(node.name, + TensorProto.UNDEFINED, + []) + outputs.append(value_info) + + while len(outputs) > 0: + tmp_outputs = outputs[:254] + model.graph.ClearField('output') + model.graph.output.MergeFrom(tmp_outputs) + prepared_backend = prepare(model, + device='CPU', + no_check_UNSAFE=True) + res = prepared_backend.run(inputs=np_images) + for idx, info in enumerate(tmp_outputs): + self.results_of_inference[info.name] = res[idx] + outputs = outputs[254:] + return + + def get_dynamic_shape(self, layer): + """ + get dynamic shape from caffe2.backend + """ + output = self.results_of_inference[layer] + return output.tolist(), output.dtype, output.shape + class ONNXDecoder(object): def __init__(self, onnx_model): model = onnx.load(onnx_model) print('model ir_version: {}, op version: {}'.format( model.ir_version, model.opset_import[0].version)) - if model.opset_import[0].version < 9: _logger.warning( 'Now, onnx2paddle main support convert onnx model opset_verison == 9,' 'opset_verison of your onnx model is %d < 9,' 'some operator may cannot convert.', model.opset_import[0].version) - check_model(model) - model = polish_model(model) + check_model(model) + model = onnx.shape_inference.infer_shapes(model) model = self.optimize_model_skip_op_for_inference(model) model = self.optimize_model_strip_initializer(model) self.standardize_variable_name(model.graph) self.model = model graph_def = model.graph - - self.onnx_graph = ONNXGraph(graph_def) + self.onnx_graph = ONNXGraph(graph_def, model) self.onnx_graph.build() - self.results_of_inference = dict() - def build_value_refs(self, nodes): """ build op reference of inputs and outputs @@ -369,9 +418,13 @@ class ONNXDecoder(object): output_name, output_refs) else: processed = -1 - if processed > 0: nodes_to_remove.append(node_idx) + for value_info in ret.graph.value_info: + for output in node.output: + if value_info.name == output: + ret.graph.value_info.remove(value_info) + print('skip op {}: {} -> {} -> {}'.format( node_idx, input_name, node.op_type, output_name)) elif processed == 0: @@ -431,7 +484,6 @@ class ONNXDecoder(object): """ standardize variable name for paddle's code """ - for initializer in graph.initializer: initializer.name = self.make_variable_name(initializer.name) for ipt in graph.input: @@ -490,41 +542,3 @@ class ONNXDecoder(object): raise RuntimeError("Input mismatch {} != {}".format( len(onnx_model.input), len(model.input))) return onnx_model - - def get_results_of_inference(self, model, input_shapes): - try: - import torch - version = torch.__version__ - if '1.1.0' not in version: - print("your model have dynamic graph, torch==1.1.0 is required") - return - except: - print( - "your model have dynamic graph, we use caff2 to inference graph, please use \"pip install torch==1.1.0\"." - ) - return - from caffe2.python.onnx.backend import prepare - - shape = input_shapes[0] - np_images = np.random.rand(shape[0], shape[1], shape[2], - shape[3]).astype('float32') - - infer_shapes = onnx.shape_inference.infer_shapes(model) - model.graph.ClearField('output') - model.graph.output.MergeFrom(infer_shapes.graph.value_info) - - prepared_backend = prepare(model, device='CPU') - output = prepared_backend.run(inputs=np_images) - - for idx, value_info in enumerate(infer_shapes.graph.value_info): - self.results_of_inference[value_info.name] = output[idx] - return - - def get_dynamic_shape_from_caffe2(self, layer, input_shapes): - """ - get dynamic shape from caffe2.backend - """ - if len(self.results_of_inference) == 0: - self.get_results_of_inference(self.model, input_shapes) - output = self.results_of_inference[layer] - return output.tolist() diff --git a/x2paddle/op_mapper/onnx_custom_layer/InstanceNormalization.py b/x2paddle/op_mapper/onnx_custom_layer/InstanceNormalization.py index ccdb4d897ed589913aab17ab91aa8a21fed988a6..84a4bc3da942e023297364901440bda0e7982567 100644 --- a/x2paddle/op_mapper/onnx_custom_layer/InstanceNormalization.py +++ b/x2paddle/op_mapper/onnx_custom_layer/InstanceNormalization.py @@ -1,5 +1,4 @@ from .register import register -from x2paddle.core.util import * def InstanceNormalization_shape(input_shape): diff --git a/x2paddle/op_mapper/onnx_custom_layer/__init__.py b/x2paddle/op_mapper/onnx_custom_layer/__init__.py index b482be5a9c51a322874fa52671e27855c72e82e4..b12eafebbb6455a27e6191ca03ec5dfad6e2a978 100644 --- a/x2paddle/op_mapper/onnx_custom_layer/__init__.py +++ b/x2paddle/op_mapper/onnx_custom_layer/__init__.py @@ -1,5 +1,4 @@ from .register import get_registered_layers - #custom layer import begins from . import InstanceNormalization diff --git a/x2paddle/op_mapper/onnx_directly_map.py b/x2paddle/op_mapper/onnx_directly_map.py index a1a4f19ff3a21a2a603e85d77168990e8c0ddfb0..bf26dbd911c3fb824f7cd7308a9a743220e3b4a0 100644 --- a/x2paddle/op_mapper/onnx_directly_map.py +++ b/x2paddle/op_mapper/onnx_directly_map.py @@ -47,13 +47,42 @@ default_op_mapping = { dict(axes='dim', keepdims='keep_dim'), dict(keep_dim=1) ], + 'ReduceSum': [ + 'reduce_sum', ['X'], ['Out'], + dict(axes='dim', keepdims='keep_dim'), + dict(keep_dim=1) + ], + + #active function + 'Relu': ['relu', ['X'], ['Out']], 'LeakyRelu': ['leaky_relu', ['X'], ['Out'], dict(), dict(alpha=.01)], + 'Elu': ['elu', ['X'], ['Out'], + dict(), dict(alpha=1.)], + 'ThresholdedRelu': [ + 'thresholded_relu', ['X'], ['Out'], + dict(alpha='threshold'), + dict(alpha=1.) + ], 'Tanh': ['tanh', ['X'], ['Out']], + 'Sigmoid': ['sigmoid', ['X'], ['Out']], + 'Pow': ['elementwise_pow', ['X', 'Y'], ['Out'], + dict(), + dict(axis=-1)], # TODO: pow for scalar exponent + 'HardSigmoid': [ + 'hard_sigmoid', ['X'], ['Out'], + dict(alpha='slope', beta='offset'), + dict(slope=.2, offset=.5) + ], + 'Softsign': ['softsign', ['X'], ['Out']], + 'Softplus': ['softplus', ['X'], ['Out']], + 'Exp': ['exp', ['X'], ['Out']], + 'Softmax': ['softmax', ['X'], ['Out'], + dict(axis=''), + dict(axis=1)], } activefunc_op_mapping = { - 'Relu': ['relu', ['X'], ['Out']], 'LeakyRelu': ['leaky_relu', ['X'], ['Out'], dict(), dict(alpha=.01)], } diff --git a/x2paddle/op_mapper/onnx_op_mapper.py b/x2paddle/op_mapper/onnx_op_mapper.py index 2e34c6ccd2dc50dd0b8b13233a4979db9228e7dc..bb4ab9a3e7884150c6b8e513d22316dc4717c55f 100644 --- a/x2paddle/op_mapper/onnx_op_mapper.py +++ b/x2paddle/op_mapper/onnx_op_mapper.py @@ -14,7 +14,6 @@ from x2paddle.core.graph import GraphNode from x2paddle.core.op_mapper import OpMapper -from x2paddle.core.util import * from x2paddle.core.fluid_code import Layer from x2paddle.core.fluid_code import FluidCode from x2paddle.decoder.onnx_decoder import ONNXGraph, ONNXGraphNode, ONNXGraphDataNode @@ -22,6 +21,7 @@ from x2paddle.op_mapper.onnx_directly_map import default_op_mapping_field_values from x2paddle.op_mapper.onnx_directly_map import default_op_mapping from x2paddle.op_mapper.onnx_directly_map import default_ioa_constraint from x2paddle.op_mapper.onnx_custom_layer import * +from x2paddle.core.util import string import numpy as np import onnx.numpy_helper as numpy_helper import logging as _logging @@ -202,6 +202,48 @@ class ONNXOpMapper(OpMapper): val_padded = self.Pad(node, op_independent=False) return [0] * ndims, val_padded + def _interpolate(self, node): + val_x = self.graph.get_node(node.layer.input[0], copy=True) + val_scales = self.graph.get_node(node.layer.input[1], copy=True) + val_y = self.graph.get_node(node.layer.output[0], copy=True) + + out_shape_ = val_y.out_shapes[0] + if out_shape_ is not None: + assert len(out_shape_) == 4, 'only 4-D Tensor as X and Y supported' + out_shape_ = out_shape_[2:] + scales = _const_weight_or_none(val_scales) + if scales is not None: + assert len(scales) == 4, 'only 4-D Tensor as X and Y supported' + assert scales[0] == 1 and scales[ + 1] == 1, 'only scale on (NC)HW supported' + assert scales[2] == scales[ + 3], 'only aspect-ratio-invariant scale supported' + scale = scales[2] if scales else None + if scale is None: + assert out_shape_, 'neither scales nor output shape is available' + out_shape = out_shape_ + else: + out_shape = None + if out_shape_ is None: + in_shape = val_x.out_shapes[0] + assert in_shape is not None, 'out_shape required but not inferrable' + assert len( + in_shape) == 4, 'only 4-D Tensor as X and Y supported' + out_shape_ = [in_shape[2] * scale, in_shape[3] * scale] + + mode = node.get_attr('mode', 'nearest') + fluid_op = 'resize_{}'.format(mode) + + attr = { + 'scale': scale, + 'out_shape': out_shape, + 'name': string(node.layer_name) + } + node.fluid_code.add_layer(fluid_op, + inputs=val_x, + output=node, + param_attr=attr) + def Pad(self, node, op_independent=True): val_x = self.graph.get_node(node.layer.input[0], copy=True) pads = node.get_attr('pads') @@ -258,6 +300,17 @@ class ONNXOpMapper(OpMapper): output=node, param_attr=attr) + def Shrink(self, node): + val_x = self.graph.get_node(node.layer.input[0], copy=True) + bias = node.get_attr('bias') + lambd = node.get_attr('lambd') + assert bias == 0.0, 'not support bias!=0' + attr = {'threshold': lambd, 'name': node.layer_name} + node.fluid_code.add_layer('hard_shrink', + inputs=val_x, + output=node, + param_attr=attr) + def Constant(self, node): val_output = self.graph.get_node(node.layer.output[0], copy=True) @@ -278,8 +331,8 @@ class ONNXOpMapper(OpMapper): 'using value as 1-D tensor may lead to fails', val_output.layer_name, val_output.layer_name) - value = value.tolist() if len(value) == 1: # scalar + value = value.tolist() shape = [1] value = value[0] if dtype.name == 'int64': @@ -289,12 +342,25 @@ class ONNXOpMapper(OpMapper): inputs=None, output=node, param_attr=attr) + else: + value = np.reshape(value, shape) + self.weights[node.layer_name] = value + attr = { + 'dtype': string(dtype), + 'shape': shape, + 'name': string(node.layer_name), + 'attr': string(node.layer_name), + 'default_initializer': 'Constant(0.0)' + } + node.fluid_code.add_layer("create_parameter", + inputs=None, + output=node, + param_attr=attr) def Resize(self, node): - # I/O val_x = self.graph.get_node(node.layer.input[0], copy=True) val_scales = self.graph.get_node(node.layer.input[1], copy=True) - val_y, = self.graph.get_node(node.layer.output[0], copy=True) + val_y = self.graph.get_node(node.layer.output[0], copy=True) out_shape_ = val_y.out_shapes[0] if out_shape_ is not None: @@ -322,8 +388,6 @@ class ONNXOpMapper(OpMapper): mode = node.get_attr('mode', 'nearest') fluid_op = 'resize_{}'.format(mode) - name_attr = ', name={}'.format(repr(name)) if name else '' - attr = { 'scale': scale, 'out_shape': out_shape, @@ -334,6 +398,33 @@ class ONNXOpMapper(OpMapper): output=node, param_attr=attr) + def Upsample(self, node): + self._interpolate(node) + + def Slice(self, node): + val_x = self.graph.get_node(node.layer.input[0], copy=True) + val_y = self.graph.get_node(node.layer.output[0], copy=True) + + axes = node.get_attr('axes') + starts = node.get_attr('starts') + ends = node.get_attr('ends') + shape = val_x.out_shapes[0] + + if shape is not None: + for idx, value in enumerate(starts): + if value > 2**63 - 1 // 2: + value = value - ONNX_INT_MAX + starts[idx] = shape[axes[idx]] + value + for idx, value in enumerate(ends): + if value > 2**63 - 1 // 2: + value = value - ONNX_INT_MAX + ends[idx] = shape[axes[idx]] + value + attr = {"axes": axes, "starts": starts, "ends": ends} + node.fluid_code.add_layer('slice', + inputs=val_x, + output=node, + param_attr=attr) + def ConstantOfShape(self, node): val_shape = self.graph.get_node(node.layer.input[0], copy=True) val_y = self.graph.get_node(node.layer.output[0], copy=True) @@ -384,8 +475,8 @@ class ONNXOpMapper(OpMapper): # catch dynamic graph shape if isinstance(val_shape, ONNXGraphNode): - shape = self.decoder.get_dynamic_shape_from_caffe2( - val_shape.layer_name, self.input_shapes) + shape, _, _ = self.decoder.onnx_graph.get_dynamic_shape( + val_shape.layer_name) if shape is None: shape = val_reshaped.out_shapes[0] @@ -440,9 +531,10 @@ class ONNXOpMapper(OpMapper): pads = node.get_attr('pads', [0] * (poolnd * 2)) fluid_op = 'pool{}d'.format(poolnd) assert 2 <= poolnd <= 3, 'only pool2d and pool3d is supported' - paddings, val_x = self._pad_if_asymmetric(node, pads, val_x) input_shape = val_x.out_shapes[0] + paddings, val_x = self._pad_if_asymmetric(node, pads, val_x) + if auto_pad == "SAME_UPPER" or auto_pad == "SAME_LOWER": pad_h = get_same_padding(input_shape[2], kernel_shape[0], strides[0]) @@ -597,14 +689,6 @@ class ONNXOpMapper(OpMapper): output=node, param_attr=attr) - def Softmax(self, node): - val_x = self.graph.get_node(node.layer.input[0], copy=True) - attr = {"name": string(node.layer_name)} - node.fluid_code.add_layer("softmax", - inputs=val_x, - output=node, - param_attr=attr) - def Transpose(self, node): val_x = self.graph.get_node(node.layer.input[0], copy=True) perm = node.get_attr('perm') @@ -614,15 +698,79 @@ class ONNXOpMapper(OpMapper): output=node, param_attr=attr) + def Mul(self, node): + val_x = self.graph.get_node(node.layer.input[0], copy=True) + val_y = self.graph.get_node(node.layer.input[1], copy=True) + + val_x_shape = val_x.out_shapes[0] + val_y_shape = val_y.out_shapes[0] + + slice_idx = 0 + for dim in val_y_shape: + if dim == 1: + slice_idx += 1 + else: + break + attr = {"name": string(node.layer_name)} + if slice_idx < len(val_y_shape) and slice_idx > 0: + val_y_reshaped = val_y_shape[slice_idx:] + var_y_reshaped = val_y.layer_name + '_reshaped' + attr_reshaped = { + 'shape': val_y_reshaped, + 'name': string(var_y_reshaped) + } + node.fluid_code.add_layer('reshape', + inputs=val_y, + output=var_y_reshaped, + param_attr=attr_reshaped) + inputs = {'x': val_x, 'y': var_y_reshaped} + node.fluid_code.add_layer("elementwise_mul", + inputs=inputs, + output=node, + param_attr=attr) + else: + inputs = {'x': val_x, 'y': val_y} + node.fluid_code.add_layer("elementwise_mul", + inputs=inputs, + output=node, + param_attr=attr) + def Div(self, node): val_x = self.graph.get_node(node.layer.input[0], copy=True) val_y = self.graph.get_node(node.layer.input[1], copy=True) - inputs = {'x': val_x, 'y': val_y} + + val_x_shape = val_x.out_shapes[0] + val_y_shape = val_y.out_shapes[0] + + slice_idx = 0 + for dim in val_y_shape: + if dim == 1: + slice_idx += 1 + else: + break attr = {"name": string(node.layer_name)} - node.fluid_code.add_layer("elementwise_div", - inputs=inputs, - output=node, - param_attr=attr) + if slice_idx < len(val_y_shape) and slice_idx > 0: + val_y_reshaped = val_y_shape[slice_idx:] + var_y_reshaped = val_y.layer_name + '_reshaped' + attr_reshaped = { + 'shape': val_y_reshaped, + 'name': string(var_y_reshaped) + } + node.fluid_code.add_layer('reshape', + inputs=val_y, + output=var_y_reshaped, + param_attr=attr_reshaped) + inputs = {'x': val_x, 'y': var_y_reshaped} + node.fluid_code.add_layer("elementwise_div", + inputs=inputs, + output=node, + param_attr=attr) + else: + inputs = {'x': val_x, 'y': val_y} + node.fluid_code.add_layer("elementwise_div", + inputs=inputs, + output=node, + param_attr=attr) def Relu(self, node): val_x = self.graph.get_node(node.layer.input[0], copy=True) @@ -679,9 +827,10 @@ class ONNXOpMapper(OpMapper): pads = node.get_attr('pads', [0] * (poolnd * 2)) # optional fluid_op = 'pool{}d'.format(poolnd) assert 2 <= poolnd <= 3, 'only pool2d and pool3d is supported' - paddings, val_x = self._pad_if_asymmetric(node, pads, val_x) input_shape = val_x.out_shapes[0] + paddings, val_x = self._pad_if_asymmetric(node, pads, val_x) + if auto_pad == "SAME_UPPER" or auto_pad == "SAME_LOWER": pad_h = get_same_padding(input_shape[2], kernel_shape[0], strides[0]) @@ -731,7 +880,6 @@ class ONNXOpMapper(OpMapper): val_y = self.graph.get_node(node.layer.output[0], copy=True) self.omit_nodes.append(val_w.layer_name) - input_shape = val_x.out_shapes[0] has_bias = len(node.layer.input) == 3 if has_bias: @@ -752,6 +900,7 @@ class ONNXOpMapper(OpMapper): dilations = node.get_attr('dilations', [1] * convnd) # optional pads = node.get_attr('pads', [0] * (convnd * 2)) # optional + input_shape = val_x.out_shapes[0] paddings, val_x = self._pad_if_asymmetric(node, pads, val_x) if auto_pad == "SAME_UPPER" or auto_pad == "SAME_LOWER": @@ -796,14 +945,14 @@ class ONNXOpMapper(OpMapper): assert kernel_shape, 'kernel_shape not inferred' convnd = len(kernel_shape) assert 2 <= convnd <= 3, 'only conv2d_transpose and conv3d_transpose supported' - num_out_channels = val_w.out_shapes[0][1] # IO... + num_out_channels = val_w.out_shapes[0][1] fluid_op = 'conv{}d_transpose'.format(convnd) - num_groups = node.get_attr('group', 1) # optional - strides = node.get_attr('strides', [1] * convnd) # optional - dilations = node.get_attr('dilations', [1] * convnd) # optional - output_size = node.get_attr('output_shape', []) # optional - pads = node.get_attr('pads', [0] * (convnd * 2)) # optional + num_groups = node.get_attr('group', 1) + strides = node.get_attr('strides', [1] * convnd) + dilations = node.get_attr('dilations', [1] * convnd) + output_size = node.get_attr('output_shape', []) + pads = node.get_attr('pads', [0] * (convnd * 2)) paddings, var_x = self._pad_if_asymmetric(node, pads, val_x) @@ -831,3 +980,39 @@ class ONNXOpMapper(OpMapper): inputs=val_x, output=node, param_attr=attr) + + +# def NonMaxSuppression(self, node): +# boxes = self.graph.get_node(node.layer.input[0], copy=True) +# scores = self.graph.get_node(node.layer.input[1], copy=True) +# max_output_boxes_per_class = self.graph.get_node(node.layer.input[2], copy=True) +# iou_threshold = self.graph.get_node(node.layer.input[3], copy=True) +# score_threshold = self.graph.get_node(node.layer.input[4], copy=True) + +# self.omit_nodes.append(max_output_boxes_per_class) +# self.omit_nodes.append(iou_threshold) +# self.omit_nodes.append(score_threshold) + +# iou_threshold_val = iou_threshold.weight +# center_point_box = node.get_attr('center_point_box', 0) +# score_threshold_val = score_threshold.weight + +# attr = { +# 'num_filters': num_out_channels, +# 'output_size': output_size or None, +# 'filter_size': kernel_shape, +# 'padding': paddings, +# 'stride': strides, +# 'dilation': dilations, +# 'groups': num_groups, +# 'param_attr': string(val_w.layer_name), +# 'bias_attr': string(val_b.layer_name), +# 'name': string(node.layer_name), +# } + +# node.fluid_code.add_layer('multiclass_nms', +# inputs= boxes.layer_name ',' + scores.layer_name, +# output=node, +# param_attr=attr) + +# pass diff --git a/x2paddle/optimizer/onnx_optimizer.py b/x2paddle/optimizer/onnx_optimizer.py index 28ffd0fdca60b353eb2881418f5d5cd1c507b5da..a8f851b6c5ea6140c53b91b5d20a6bbf3aa3046f 100644 --- a/x2paddle/optimizer/onnx_optimizer.py +++ b/x2paddle/optimizer/onnx_optimizer.py @@ -14,7 +14,6 @@ # TODO useless node remove from x2paddle.op_mapper.onnx_op_mapper import ONNXOpMapper -from x2paddle.core.util import * class ONNXOptimizer(object):