diff --git a/onnx2fluid/README.md b/onnx2fluid/README.md index 95e4ad41101a6280ef6137944e15027255321de5..946882437c1ce785650a6a25f2f487971bad0478 100644 --- a/onnx2fluid/README.md +++ b/onnx2fluid/README.md @@ -54,7 +54,7 @@ onnx2fluid sample_1.onnx -t sample_1.npz onnx2fluid: ```shell -onnx2fluid [-dexy] [-o /path/to/export_dir/] [-z archive.zip] [-t test_data.npz] /path/to/onnx/model.onnx +onnx2fluid [-dexy] [-o /path/to/export_dir/] [-z archive.zip] [-t test_data.npz] [-i [input_name1,input_name2]] /path/to/onnx/model.onnx optional arguments: --debug, -d 启用调试 @@ -65,6 +65,8 @@ optional arguments: --output_dir, -o 指定输出目录 --archive [ARCHIVE], -z [ARCHIVE] 如果验证通过,打包到指定的ZIP文件 + --infer_inputs, -i [input_name1,input_name2] + 调用PaddlePaddle fluid类形推导完善模型 ``` 转换工具onnx2fluid.conversion: @@ -76,7 +78,7 @@ onnx2fluid.conversion [-dexy] [-o /path/to/export_dir/] /path/to/onnx/model.onnx 验证工具onnx2fluid.validate: ```shell -onnx2fluid.validate [-d] [-t test_data.npz] [-p 1e-3] /path/to/onnx/model.onnx +onnx2fluid.validate [-d] [-t test_data.npz] [-i [input_name1,input_name2]] [-p 1e-3] /path/to/onnx/model.onnx ``` ## 参考 diff --git a/onnx2fluid/README_en.md b/onnx2fluid/README_en.md index 80259608d1737911da2d4ecad683d28897b57856..2a8782282a130b639d950ad3ac6ec176fab070cf 100644 --- a/onnx2fluid/README_en.md +++ b/onnx2fluid/README_en.md @@ -19,8 +19,8 @@ PyTorch to Paddlepaddle model conversion can be easily achieved with PyTorch ONN ## Environment and dependency * python 3.5+ (python 2 not fully supported yet) -* onnx == 1.4.0 -* paddlepaddle == 1.3.0 (optional for validation) +* onnx >= 1.4 +* paddlepaddle >= 1.3.0 (optional for validation) ## Get started @@ -47,10 +47,12 @@ onnx2fluid sample_unet.onnx -t sample_unet.npz ## Usage +**ONNX opset 9+** is mainly supported, corresponded to PyTorch **1.0/1.1(stable opset)**,for more information: [ONNX doc](https://github.com/onnx/onnx/blob/master/docs/Operators.md) + onnx2fluid (all in one): ```shell -onnx2fluid [-dexy] [-o /path/to/export_dir/] [-z archive.zip] [-t test_data.npz] /path/to/onnx/model.onnx +onnx2fluid [-dexy] [-o /path/to/export_dir/] [-z archive.zip] [-t test_data.npz] [-i [input_name1,input_name2]] /path/to/onnx/model.onnx optional arguments: --debug, -d enable debug logging and checking @@ -61,6 +63,8 @@ optional arguments: --output_dir, -o output directory --archive [ARCHIVE], -z [ARCHIVE] compress outputs to ZIP file if conversion successed + --infer_inputs, -i [input_name1,input_name2] + invoke PaddlePaddle fluid type-shape inference ``` onnx2fluid.conversion: @@ -72,10 +76,10 @@ onnx2fluid.conversion [-dexy] [-o /path/to/export_dir/] /path/to/onnx/model.onnx onnx2fluid.validate: ```shell -onnx2fluid.validate [-d] [-t test_data.npz] [-p 1e-3] /path/to/onnx/model.onnx +onnx2fluid.validate [-d] [-t test_data.npz] [-i [input_name1,input_name2]] [-p 1e-3] /path/to/onnx/model.onnx ``` ## Reference -* [PaddlePaddle fluid operators](http://www.paddlepaddle.org/documentation/docs/en/1.4/api/layers.html) -* load converted model via [load_inference_model](http://www.paddlepaddle.org/documentation/docs/en/1.4/api/io.html#permalink-1-load_inference_model) +* [PaddlePaddle fluid operators](http://www.paddlepaddle.org/documentation/docs/en/1.5/api/layers.html) +* load converted model via [load_inference_model](http://www.paddlepaddle.org/documentation/docs/en/1.5/api/io.html#permalink-1-load_inference_model) diff --git a/onnx2fluid/examples/convert_data_npz_0.py b/onnx2fluid/examples/convert_data_npz_0.py deleted file mode 100644 index bd23f527a2f33981496aff1569be0ecfa5b77141..0000000000000000000000000000000000000000 --- a/onnx2fluid/examples/convert_data_npz_0.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Mar 27 11:50:03 2019 - -@author: Macrobull -""" - -import sys -import numpy as np - -from collections import OrderedDict as Dict - - -def _make_var_name(name): - """ - make a valid variable name in Python code - """ - - if name == '': - return '_' - if name[0].isdigit(): - return 'var_' + name - for s in ' *?\\/-:': - name = name.replace(s, '_') - if name.startswith('_'): - name = 'var' + name - return name - - -fn = sys.argv[1] -input_names = sys.argv[2].split(':') -output_name = sys.argv[3].split(':') -squeeze_data = len(sys.argv) > 4 - -data = np.load(fn, encoding='bytes') -input_data = data['inputs'] -output_data = data['outputs'] - -while squeeze_data and input_data.ndim > 4 and input_data.shape[0] == 1: - input_data = input_data.squeeze(0) -while squeeze_data and output_data.ndim > 2 and output_data.shape[0] == 1: - output_data = output_data.squeeze(0) - -inputs = Dict(zip(map(_make_var_name, input_names), [input_data])) -outputs = Dict(zip(map(_make_var_name, output_name), [output_data])) - -np.savez(fn, inputs=inputs, outputs=outputs) # overwrite diff --git a/onnx2fluid/examples/convert_data_pb_0.py b/onnx2fluid/examples/convert_data_pb_0.py deleted file mode 100644 index 78bac77ae5c8094edf7a833251ec878c776a18f5..0000000000000000000000000000000000000000 --- a/onnx2fluid/examples/convert_data_pb_0.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Mar 27 11:50:03 2019 - -@author: Macrobull -""" - -import os, sys -import numpy as np -import onnx -import onnx.numpy_helper as numpy_helper - -from collections import OrderedDict as Dict -from glob import glob - - -def _make_var_name(name): - """ - make a valid variable name in Python code - """ - - if name == '': - return '_' - if name[0].isdigit(): - return 'var_' + name - for s in ' *?\\/-:': - name = name.replace(s, '_') - if name.startswith('_'): - name = 'var' + name - return name - - -data_dir = os.path.dirname(sys.argv[1]) -input_names = sys.argv[2].split(':') -output_name = sys.argv[3].split(':') -squeeze_data = len(sys.argv) > 4 - -# Load inputs -inputs = [] -for fn in glob(os.path.join(data_dir, 'input_*.pb')): - tensor = onnx.TensorProto() - with open(fn, 'rb') as f: - tensor.ParseFromString(f.read()) - tensor = numpy_helper.to_array(tensor) - while squeeze_data and tensor.ndim > 4 and tensor.shape[0] == 1: - tensor = tensor.squeeze(0) - inputs.append(tensor) - -# Load outputs -outputs = [] -for fn in glob(os.path.join(data_dir, 'output_*.pb')): - tensor = onnx.TensorProto() - with open(fn, 'rb') as f: - tensor.ParseFromString(f.read()) - tensor = numpy_helper.to_array(tensor) - while squeeze_data and tensor.ndim > 2 and tensor.shape[0] == 1: - tensor = tensor.squeeze(0) - outputs.append(tensor) - -inputs = Dict(zip(map(_make_var_name, input_names), inputs)) -outputs = Dict(zip(map(_make_var_name, output_name), outputs)) - -np.savez(data_dir, inputs=inputs, outputs=outputs) diff --git a/onnx2fluid/examples/gen_some_samples.py b/onnx2fluid/examples/gen_some_samples.py index 52aacefeba70ef9705f02e3c5ce37247693b43f3..01ec25facc518b1f1e78c69094d6d3db1d84b7a9 100644 --- a/onnx2fluid/examples/gen_some_samples.py +++ b/onnx2fluid/examples/gen_some_samples.py @@ -20,34 +20,74 @@ from onnx2fluid.torch_export_helper import export_onnx_with_validation prefix = 'sample_' idx = 0 +######## example: RNN cell ######## + + +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + self.gru = nn.GRUCell(6, 5) + self.lstm = nn.LSTMCell(5, 4) + + def forward(self, x, h1, h2, c2): + h = self.gru(x, h1) + h, c = self.lstm(h, (h2, c2)) + return h, c + + +model = Model() +model.eval() +xb = torch.rand((7, 6)) +h1 = torch.zeros((7, 5)) +h2 = torch.zeros((7, 4)) +c2 = torch.zeros((7, 4)) +yp = model(xb, h1, h2, c2) +idx += 1 +print('index: ', idx) +export_onnx_with_validation(model, [xb, h1, h2, c2], + prefix + str(idx), ['x', 'h1', 'h2', 'c2'], + ['h', 'c'], + verbose=True, + training=False) + ######## example: RNN ######## class Model(nn.Module): def __init__(self): super(Model, self).__init__() - self.gru = nn.GRU(4, 5, 3) - self.lstm = nn.LSTM(5, 6, 2) + self.gru = nn.GRU(6, 5, 3) + self.lstm = nn.LSTM(5, 4, 2) - def forward(self, x): - y = x - y, h = self.gru(y) - y, h = self.lstm(y) + def forward(self, x, h1, h2, c2): + y, h1 = self.gru(x, h1) + y, (h2, c2) = self.lstm(y, (h2, c2)) return y model = Model() model.eval() -xb = torch.rand((2, 3, 4)) -yp = model(xb) +xb = torch.rand((8, 1, 6)) +h1 = torch.zeros((3, 1, 5)) +h2 = torch.zeros((2, 1, 4)) +c2 = torch.zeros((2, 1, 4)) +yp = model(xb, h1, h2, c2) idx += 1 print('index: ', idx) -export_onnx_with_validation(model, [xb], - prefix + str(idx), ['x'], ['y'], +export_onnx_with_validation(model, [xb, h1, h2, c2], + prefix + str(idx), ['x', 'h1', 'h2', 'c2'], ['y'], verbose=True, training=False) ######## example: random ######## +""" + symbolic registration: + + def rand(g, *shapes): + shapes_list = list(shapes) + shape = _maybe_get_const(shapes_list[0], "is") + return g.op('RandomUniform', shape_i=shape) +""" class Model(nn.Module): @@ -55,8 +95,9 @@ class Model(nn.Module): super(Model, self).__init__() def forward(self, x): - y = torch.rand((2, 3)) # + torch.rand_like(xb) - y = y + torch.randn((2, 3)) # + torch.randn_like(xb) + y = torch.rand((2, 3)) # + torch.rand_like(x) + y = y + torch.randn((2, 3)) # + torch.randn_like(x) + y = y + x return y @@ -124,6 +165,13 @@ export_onnx_with_validation(model, [xb0, xb1], training=False) ######## example: affine_grid ######## +""" + symbolic registration: + + @parse_args('v', 'is') + def affine_grid_generator(g, theta, size): + return g.op('AffineGrid', theta, size_i=size) +""" class Model(nn.Module): diff --git a/onnx2fluid/onnx2fluid/cmdline.py b/onnx2fluid/onnx2fluid/cmdline.py index 4294490224a4762301d102a2b6b45970cbb5fcf5..14753ace9d3af3e71ce3a145a663cfc6028e8077 100644 --- a/onnx2fluid/onnx2fluid/cmdline.py +++ b/onnx2fluid/onnx2fluid/cmdline.py @@ -61,7 +61,7 @@ def main(**kwargs): passed = True golden_data_filename = kwargs.pop('test_data', '') infer_inputs = kwargs.pop('infer_inputs', None) - if golden_data_filename or infer_inputs: + if golden_data_filename or infer_inputs is not None: from .validation import validate save_inference_model = infer_inputs is not None diff --git a/onnx2fluid/onnx2fluid/conversion.py b/onnx2fluid/onnx2fluid/conversion.py index a7fee9cd45bf8c1220bdcb520623bd64a8d147bd..336dac4fb2641db39b4ebb690f07d708b4755140 100644 --- a/onnx2fluid/onnx2fluid/conversion.py +++ b/onnx2fluid/onnx2fluid/conversion.py @@ -91,7 +91,7 @@ def convert(onnx_model_filename, # onnx model optimization logger.info('model has %d ops', len(onnx_model.graph.node)) logger.info('optimizing model ...') - onnx_model = polish_model(onnx_model) + onnx_model = polish_model(onnx_model, checking=onnx_opset_pedantic) # prepare filesystem shutil.rmtree(save_dir, ignore_errors=True) @@ -123,6 +123,7 @@ def convert(onnx_model_filename, for name, weight in graph_weights(onnx_graph): var_name = make_var_name(name) value_info = value_infos[var_name] + value_info['lod'] = [0] value_info['embedded_as'] = [] value_info['get_weight'] = (lambda w: lambda: w.tolist())( weight) # lazy getter @@ -134,8 +135,8 @@ def convert(onnx_model_filename, for name, domain, op_type, inputs, outputs, attrs in graph_ops(onnx_graph, topo=topo): op_name = make_var_name(name) - inputs = [make_var_name(val) for val in inputs] - outputs = [make_var_name(val) for val in outputs] + inputs = list(map(make_var_name, inputs)) + outputs = list(map(make_var_name, outputs)) logger.debug('translating op %s(%s) %s::%s ...', name, op_name, domain, op_type) if domain == DEFAULT_OP_DOMAIN: @@ -192,13 +193,16 @@ def convert(onnx_model_filename, weight.dtype, weight.size, weight.nbytes, embedded_names) for embedded_name in embedded_names: # multiple references - fluid_writer.write_weight( - weight, shutil.os.path.join(save_dir, embedded_name)) + fluid_writer.write_weight(weight, + shutil.os.path.join( + save_dir, embedded_name), + lod=value_info['lod']) else: logger.debug('saving weight %s(%s[%d], %dB) to %s ...', name, weight.dtype, weight.size, weight.nbytes, var_name) fluid_writer.write_weight(weight, - shutil.os.path.join(save_dir, var_name)) + shutil.os.path.join(save_dir, var_name), + lod=value_info['lod']) fluid_writer.emit_param(fluid_program, var_name, value_info) param_codes = fluid_program.codes fluid_program.codes = [] diff --git a/onnx2fluid/onnx2fluid/onnx_utils.py b/onnx2fluid/onnx2fluid/onnx_utils.py index e8b5a472e312439f97606c02e5cc4f7f657ce4c2..f738a537b5225c16868c3e5bb3f550fb0ea971fb 100644 --- a/onnx2fluid/onnx2fluid/onnx_utils.py +++ b/onnx2fluid/onnx2fluid/onnx_utils.py @@ -319,17 +319,20 @@ def skip_node_backward(nodes, src_input_name, dst_output_name, output_refs): return processed -def polish_model(model, extras=True): +def polish_model(model, internals=True, extras=True, checking=True): """ polish_model enhanced for inference """ - check_model(model) + if checking: + check_model(model) strip_doc_string(model) - passes = optimizer.get_available_passes() - passes = list(filter(lambda name: not name.startswith('split_'), passes)) # - logger.debug('builtin optimizations to perform in ONNX:\n\t%s', passes) - model = optimizer.optimize(model, passes=passes) + if internals: + passes = optimizer.get_available_passes() + passes = list(filter(lambda name: not name.startswith('split_'), + passes)) # + logger.debug('builtin optimizations to perform in ONNX:\n\t%s', passes) + model = optimizer.optimize(model, passes=passes) if extras: for optimize in ( optimize_model_skip_op_for_inference, @@ -339,7 +342,8 @@ def polish_model(model, extras=True): ): model = optimize(model) model = infer_shapes(model) - check_model(model) + if checking: + check_model(model) return model diff --git a/onnx2fluid/onnx2fluid/symbolic.py b/onnx2fluid/onnx2fluid/symbolic.py index 139d3bcef96159e275373def886ae7e4f37b3c7f..d5f1167be9135c81642c871983e6b3259a8e55d9 100644 --- a/onnx2fluid/onnx2fluid/symbolic.py +++ b/onnx2fluid/onnx2fluid/symbolic.py @@ -44,10 +44,10 @@ DEFAULT_OP_MAPPING = { ## nil ops ## 'RandomUniform': ['uniform_random', [], ['Out'], dict(high='max', low='min'), - dict(), None, None, False], + dict(), None, None, False], # TODO: add dtype support 'RandomNormal': ['gaussian_random', [], ['Out'], dict(scale='std'), - dict(), None, None, False], + dict(), None, None, False], # TODO: add dtype support ## unary ops ## 'Abs': ['abs', ['X'], ['Out']], 'Acos': ['acos', ['X'], ['Out']], @@ -63,21 +63,27 @@ DEFAULT_OP_MAPPING = { 'Flatten': ['flatten', ['X'], ['Out']], # attrs bypassed, FIXME: emit flatten2 'Floor': ['floor', ['X'], ['Out']], 'Gather': ['gather', ['X'], ['Out'], dict(axis='')], + 'HardSigmoid': ['hard_sigmoid', ['X'], ['Out'], dict(alpha='slope', beta='offset')], + 'Identity': ['assign', ['X'], ['Out']], 'LeakyRelu': ['leaky_relu', ['X'], ['Out']], 'Log': ['log', ['X'], ['Out']], 'LRN': ['lrn', ['X'], ['Out', 'MidOut'], dict(size='n', bias='k')], # 'Reciprocal': ['reciprocal', ['X'], ['Out']], 'Relu': ['relu', ['X'], ['Out']], + 'Round': ['round', ['X'], ['Out']], 'Selu': ['selu', ['X'], ['Out'], dict(gamma='scale')], 'Shape': ['shape', ['X'], ['Out']], # FIXME: out is int64 vs int32 'Shrink': ['softshrink', ['X'], ['Out'], dict(bias='', labmd='')], 'Sigmoid': ['sigmoid', ['X'], ['Out']], + 'Sign': ['sign', ['X'], ['Out']], 'Sin': ['sin', ['X'], ['Out']], 'Squeeze': ['squeeze', ['X'], ['Out']], # attrs bypassed, FIXME: emit squeeze2 'Softplus': ['softplus', ['X'], ['Out']], # FIXME: default axis = -1, reshape required before and after 'Softmax': ['softmax', ['X'], ['Out'], dict(axis='')], + 'Softplus': ['softplus', ['X'], ['Out']], 'Softsign': ['softsign', ['X'], ['Out']], + 'SpaceToDepth': ['space_to_depth', ['X'], ['Out']], 'Sqrt': ['sqrt', ['X'], ['Out']], 'Tanh': ['tanh', ['X'], ['Out']], 'ThresholdedRelu': ['thresholded_relu', ['X'], ['Out'], dict(alpha='threshold')], @@ -94,6 +100,7 @@ DEFAULT_OP_MAPPING = { 'MatMul': ['matmul', ['X', 'Y'], ['Out']], # defaults excluded for transpose_x vs transpose_X 'Max': ['elementwise_max', ['X', 'Y'], ['Out'], dict(), dict(axis=-1)], 'Min': ['elementwise_min', ['X', 'Y'], ['Out'], dict(), dict(axis=-1)], + 'Mod': ['elementwise_mod', ['X', 'Y'], ['Out'], dict(), dict(axis=-1)], 'Mul': ['elementwise_mul', ['X', 'Y'], ['Out'], dict(), dict(axis=-1)], 'Not': ['logical_not', ['X', 'Y'], ['Out']], 'OneHot': # assuming values=[0, 1], axis=-1 and drop them @@ -117,32 +124,30 @@ DEFAULT_OP_MAPPING = { DEFAULT_IOA_CONSTRAINTS = { 'ArgMax': [ (lambda i, o, a: a.get('keepdims', 1) == 1, - 'only keepdims = 0 is supported'), + 'only keepdims = 0 supported'), ], 'ArgMin': [ (lambda i, o, a: a.get('keepdims', 1) == 1, - 'only keepdims = 0 is supported'), + 'only keepdims = 0 supported'), ], 'Gather': [ - (lambda i, o, a: a.get('axis', 0) == 0, 'only axis = 0 is supported'), + (lambda i, o, a: a.get('axis', 0) == 0, 'only axis = 0 supported'), ], 'Shrink': [ (lambda i, o, a: a.get('bias', 0) == a.get('lambd', 0.5), - 'only SoftShrink with bias = lambd is supported'), + 'only SoftShrink with bias = lambd supported'), ], # 'Softmax': # [(lambda i, o, a: a.get('axis', 1) == -2, 'Paddle fluid Softmax works on dim -2 only'), # ], 'OneHot': [ - (lambda i, o, a: a.get('axis', -1) == -1, - 'only axis = -1 is supported'), + (lambda i, o, a: a.get('axis', -1) == -1, 'only axis = -1 supported'), ], 'Scatter': [ - (lambda i, o, a: a.get('axis', 0) == 0, 'only axis = 0 is supported'), + (lambda i, o, a: a.get('axis', 0) == 0, 'only axis = 0 supported'), ], 'TopK': [ - (lambda i, o, a: a.get('axis', -1) == -1, - 'only axis = -1 is supported'), + (lambda i, o, a: a.get('axis', -1) == -1, 'only axis = -1 supported'), ], } @@ -226,10 +231,10 @@ def _default(prog, op_type, inputs, outputs, attrs, *args, name='', **kwargs): fluid_attrs = default_attrs.copy() fluid_attrs.update(mapped_attrs) # as new attrs - var_inps = inputs if input_perm is None else list( - map(inputs.__getitem__, input_perm)) - var_outs = outputs if output_perm is None else list( - map(outputs.__getitem__, output_perm)) + var_inps = list(map(inputs.__getitem__, + input_perm)) if input_perm else inputs + var_outs = list(map(outputs.__getitem__, + output_perm)) if output_perm else outputs arg_name = ', name={}'.format( repr(name)) if fill_name_field and name else '' arg_attrs = [ @@ -240,7 +245,7 @@ def _default(prog, op_type, inputs, outputs, attrs, *args, name='', **kwargs): ', '.join(var_outs), fluid_op, ', '.join(var_inps), - ''.join(arg_attrs), + ''.join(arg_attrs)[(0 if var_inps else 2):], arg_name, )) @@ -255,8 +260,8 @@ def _default(prog, op_type, inputs, outputs, attrs, *args, name='', **kwargs): for var_out in var_outs: prog.VarDesc(var_out) - prog.OpDesc(fluid_op, (var_inps, *fluid_input_args), - (var_outs, *fluid_output_args), fluid_attrs) + prog.OpDesc(fluid_op, (fluid_input_args, var_inps), + (fluid_output_args, var_outs), fluid_attrs) def _assign(prog, mapping): @@ -272,8 +277,8 @@ def _assign(prog, mapping): prog.VarDesc(var_dst) prog.OpDesc( fluid_op, - ([var_src], 'X'), - ([var_dst], 'Out'), + (['X'], [var_src]), + (['Out'], [var_dst]), dict(), ) @@ -289,7 +294,7 @@ def _zeros_like(prog, var_ref, var_out, value_infos): ) -def _pad_if_asymmetric(prog, pads, var_name, value_infos): # pads: SSEE +def _pad_if_asymmetric(prog, pads, var_input, value_infos): # pads: SSEE assert len(pads) & 1 == 0 ndims = len(pads) // 2 symmetric = True @@ -298,13 +303,13 @@ def _pad_if_asymmetric(prog, pads, var_name, value_infos): # pads: SSEE symmetric = False break if symmetric: - return pads[:ndims], var_name + return pads[:ndims], var_input - var_padded = var_name + '_padded' # explicit variable + var_padded = var_input + '_padded' # explicit variable prog.Op( '', 'Pad', - [var_name], + [var_input], [var_padded], { 'mode': 'constant', @@ -312,7 +317,7 @@ def _pad_if_asymmetric(prog, pads, var_name, value_infos): # pads: SSEE 'pads': pads, }, value_infos=value_infos, - name=var_padded, + name=(var_input + '_pad'), ) return [0] * ndims, var_padded @@ -320,12 +325,12 @@ def _pad_if_asymmetric(prog, pads, var_name, value_infos): # pads: SSEE def _adaptive_pool(prog, pool_type, inputs, outputs, attrs, name=''): # I/O var_x, = inputs - var_y, var_indices = (outputs + [None] * 1)[:2] + var_y, var_indices, = (outputs + [None] * 1)[:2] # interpretation pool_size = attrs['output_size'] # required poolnd = len(pool_size) - assert 2 <= poolnd <= 3, 'only pool2d and pool3d is supported' + assert 2 <= poolnd <= 3, 'only pool2d and pool3d supported' fluid_op = 'adaptive_pool{}d'.format(poolnd) name_attr = ', name={}'.format(repr(name)) if name else '' @@ -352,15 +357,16 @@ def _adaptive_pool(prog, pool_type, inputs, outputs, attrs, name=''): prog.VarDesc(var_indices) prog.OpDesc( fluid_op, - ([var_x], 'X'), - ([var_y] + ([var_indices] if var_indices else []), 'Out', 'Indices'), + (['X'], [var_x]), + (['Out', 'Indices'], [var_y] + ([var_indices] if var_indices else [])), { 'global_pooling': False, 'adaptive': True, - 'exclusive': True, 'require_index': bool(var_indices), 'pooling_type': pool_type, 'ksize': pool_size, + # unused + # 'exclusive': True, }, ) @@ -378,7 +384,7 @@ def _global_pool(prog, pool_type, inputs, outputs, attrs, value_infos, name=''): poolnd = len(input_shape) - 2 # NC... elif output_shape is not None: poolnd = len(output_shape) - 2 # NC... - assert 2 <= poolnd <= 3, 'only pool2d and pool3d is supported' + assert 2 <= poolnd <= 3, 'only pool2d and pool3d supported' fluid_op = 'pool{}d'.format(poolnd) name_attr = ', name={}'.format(repr(name)) if name else '' @@ -397,13 +403,17 @@ def _global_pool(prog, pool_type, inputs, outputs, attrs, value_infos, name=''): prog.VarDesc(var_y) prog.OpDesc( fluid_op, - ([var_x], 'X'), - ([var_y], 'Out'), + (['X'], [var_x]), + (['Out'], [var_y]), { 'global_pooling': True, 'adaptive': False, 'pooling_type': pool_type, 'ksize': [-1, -1], + # unused + 'strides': [-1, -1], + 'paddings': [0, 0], + 'ceil_mode': False, }, ) @@ -411,15 +421,17 @@ def _global_pool(prog, pool_type, inputs, outputs, attrs, value_infos, name=''): def _pool(prog, pool_type, inputs, outputs, attrs, value_infos, name=''): # I/O var_x, = inputs - var_y, var_indices = (outputs + [None] * 1)[:2] + var_y, var_indices, = (outputs + [None] * 1)[:2] # interpretation assert attrs.get( 'auto_pad', - 'NOTSET') == 'NOTSET', 'only auto_pad = NOTSET is supported' # optional + 'NOTSET') == 'NOTSET', 'only auto_pad = NOTSET supported' # optional + assert attrs.get('count_include_pad', + 0) == 0, 'only count_include_pad = 0 supported' # optional pool_size = attrs['kernel_shape'] # required poolnd = len(pool_size) - assert 2 <= poolnd <= 3, 'only pool2d and pool3d is supported' + assert 2 <= poolnd <= 3, 'only pool2d and pool3d supported' fluid_op = 'pool{}d'.format(poolnd) strides = attrs.get('strides', [1] * poolnd) # optional @@ -452,25 +464,26 @@ def _pool(prog, pool_type, inputs, outputs, attrs, value_infos, name=''): prog.VarDesc(var_indices) prog.OpDesc( fluid_op, - ([var_x], 'X'), - ([var_y] + ([var_indices] if var_indices else []), 'Out', 'Indices'), + (['X'], [var_x]), + (['Out', 'Indices'], [var_y] + ([var_indices] if var_indices else [])), { 'global_pooling': False, 'adaptive': False, - 'exclusive': True, 'require_index': bool(var_indices), 'pooling_type': pool_type, 'ksize': pool_size, 'strides': strides, 'paddings': paddings, 'ceil_mode': ceil_mode, + # unused + 'exclusive': True, }, ) def _roi_pool(prog, fluid_op, inputs, outputs, attrs, value_infos, name): # I/O - var_x, var_rois = inputs + var_x, var_rois, = inputs var_y, = outputs # interpretation @@ -514,15 +527,15 @@ def _roi_pool(prog, fluid_op, inputs, outputs, attrs, value_infos, name): prog.VarDesc(var_argmax) prog.OpDesc( fluid_op, - ([var_x, var_rois], 'X', 'Rois'), - ([var_y] + ([var_argmax] if is_max_pool else []), 'Out', 'Argmax'), + (['X', 'Rois'], [var_x, var_rois]), + (['Out', 'Argmax'], [var_y] + ([var_argmax] if is_max_pool else [])), od_attrs, ) def _interpolate(prog, inputs, outputs, attrs, value_infos, name=''): # I/O - var_x, var_scales = inputs + var_x, var_scales, = inputs var_y, = outputs # interpretation @@ -542,7 +555,7 @@ def _interpolate(prog, inputs, outputs, attrs, value_infos, name=''): scale = scales and scales[2] # try input shape if scale is None: - assert out_shape_, 'neither scales nor output shape is available' + assert out_shape_, 'neither scales nor output shape available' out_shape = out_shape_ else: out_shape = None @@ -572,8 +585,8 @@ def _interpolate(prog, inputs, outputs, attrs, value_infos, name=''): prog.VarDesc(var_y) prog.OpDesc( fluid_op, - ([var_x], 'X'), - ([var_y], 'Out'), + (['X'], [var_x]), + (['Out'], [var_y]), { 'interp_method': mode, 'out_h ': out_shape_[0], @@ -626,8 +639,8 @@ def AffineGrid(prog, inputs, outputs, attrs, *args, name='', **kwargs): prog.VarDesc(var_grid) prog.OpDesc( fluid_op, - ([var_theta], 'Theta'), - ([var_grid], 'Output'), + (['Theta'], [var_theta]), + (['Output'], [var_grid]), {'output_shape': size}, # f**k you API ) @@ -661,10 +674,14 @@ def BatchNormalization(prog, """ # I/O - var_x, var_scale, var_b, var_mean, var_var = inputs - var_y, = outputs - var_saved_mean = name + '.saved_mean' # dummy output - var_saved_variance = name + '.saved_variance' # dummy output + var_x, var_scale, var_b, var_mean, var_var, = inputs + var_y, var_mean_, var_var_, var_saved_mean, var_saved_variance, = ( + outputs + [None] * 4)[:5] + assert var_saved_mean or (name != '') + assert var_saved_variance or (name != '') + var_saved_mean = var_saved_mean or (name + '.saved_mean') # dummy output + var_saved_variance = var_saved_variance or (name + '.saved_variance' + ) # dummy output # interpretation fluid_op = 'batch_norm' @@ -700,7 +717,7 @@ def BatchNormalization(prog, repr(var_var)) # generation - prog.Code('{} = layers.{}({}, is_test=True, data_layout="NCHW"' + prog.Code('{} = layers.{}({}, is_test=True' ', momentum={}' ', epsilon={}' '{}{})'.format( @@ -718,16 +735,15 @@ def BatchNormalization(prog, prog.VarDesc(var_saved_variance) prog.OpDesc( fluid_op, - ([var_x, var_scale, var_b, var_mean, var_var - ], 'X', 'Scale', 'Bias', 'Mean', 'Variance'), - ([var_y, var_mean, var_saved_mean, var_saved_variance, var_var - ], 'Y', 'MeanOut', 'SavedMean', 'SavedVariance', 'VarianceOut'), + (['X', 'Scale', 'Bias', 'Mean', 'Variance' + ], [var_x, var_scale, var_b, var_mean, var_var]), + (['Y', 'MeanOut', 'SavedMean', 'SavedVariance', 'VarianceOut' + ], [var_y, var_mean, var_saved_mean, var_saved_variance, var_var]), { - 'is_test': 1, - 'data_layout': 'NCHW', - 'use_global_stats': False, 'momentum': momentum, 'epsilon': epsilon, + 'is_test': 1, + # unused }, ) @@ -745,9 +761,11 @@ def Cast(prog, inputs, outputs, attrs, value_infos, *args, **kwargs): dtype = attrs['to'] # required if not isinstance(dtype, _np.dtype): # additional: possible np.dtype dtype = TENSOR_TYPE_TO_NP_TYPE[dtype] - output_dtype = _dtype_or_none(value_infos, var_output) - if output_dtype is not None: - assert dtype == output_dtype, 'dtype of to unmatches output' + + +# output_dtype = _dtype_or_none(value_infos, var_output) +# if output_dtype is not None: +# assert dtype == output_dtype, 'dtype of to unmatches output' fluid_op = 'cast' @@ -764,8 +782,8 @@ def Cast(prog, inputs, outputs, attrs, value_infos, *args, **kwargs): prog.VarDesc(var_output) prog.OpDesc( fluid_op, - ([var_input], 'X'), - ([var_output], 'Out'), + (['X'], [var_input]), + (['Out'], [var_output]), { 'in_dtype': prog.Dtype(_dtype(value_infos, var_input)), # holy, required @@ -801,8 +819,8 @@ def Concat(prog, inputs, outputs, attrs, *args, name='', **kwargs): prog.VarDesc(var_ret) prog.OpDesc( fluid_op, - (inputs, *(['X'] * len(inputs))), - ([var_ret], 'Out'), + (['X'] * len(inputs), inputs), + (['Out'], [var_ret]), {'axis': axis}, ) @@ -819,13 +837,11 @@ def Constant(prog, inputs, outputs, attrs, value_infos, *args, **kwargs): # interpretation value = attrs['value'] # required dtype = _np.dtype(value.dtype) - output_dtype = _dtype_or_none(value_infos, var_output) - if output_dtype is not None: - assert dtype == output_dtype, 'tensor dtype unmatches storage dtype' - - -# dtype = _np.dtype('float32') # HINT: force to float32 - shape = attrs.get('shape', None) # + # output_dtype = _dtype_or_none(value_infos, var_output) + # if output_dtype is not None: + # assert dtype == output_dtype, 'tensor dtype unmatches storage dtype' + # dtype = _np.dtype('float32') # HINT: force to float32 + shape = attrs.get('shape', None) # additional if shape is None: shape = _shape_or_none(value_infos, var_output) if shape is None: @@ -836,10 +852,9 @@ def Constant(prog, inputs, outputs, attrs, value_infos, *args, **kwargs): 'using value as 1-D tensor may lead to fails', outputs, var_output) # generation - value = value.tolist() - if len(value) == 1: # scalar + if len(shape) == 0 or value.size == 1: # scalar or 1-size shape = [1] # WORKAROUND: bad scalar support - value = value[0] + value = value.tolist()[0] fluid_op = 'fill_constant' prog.Code('{} = layers.{}(shape={}, dtype={}, value={})'.format( var_output, @@ -852,8 +867,8 @@ def Constant(prog, inputs, outputs, attrs, value_infos, *args, **kwargs): prog.VarDesc(var_output) prog.OpDesc( fluid_op, - ([], ), - ([var_output], 'Out'), + ([], []), + (['Out'], [var_output]), { 'shape': shape, 'dtype': prog.Dtype(dtype), @@ -862,7 +877,7 @@ def Constant(prog, inputs, outputs, attrs, value_infos, *args, **kwargs): ) else: # list parameter -> const_value prog.Code('# {} = {} # passed directly as literal'.format( - var_output, value)) + var_output, value.tolist())) value_infos[var_output]['const_value'] = value @@ -882,16 +897,16 @@ def ConstantOfShape(prog, inputs, outputs, attrs, value_infos, *args, **kwargs): assert shape is not None, ( 'given shape is neither const value nor deductible from output, ' 'this is not supported') - dtype = attrs['value'].dtype attrs = attrs.copy() - attrs.update({'shape': shape, 'dtype': dtype}) # pass const + attrs.setdefault('value', np.array(0, dtype=np.float32)) + attrs.update({'shape': shape}) # pass const prog.Code('# shape:{}={} # const as literal'.format(var_shape, shape)) prog.Op( '', 'Constant', [], - outputs, # val + outputs, attrs, value_infos, ) @@ -902,7 +917,7 @@ def Conv(prog, outputs, attrs, value_infos, - name='', + name, embed_params=False, *args, **kwargs): @@ -911,18 +926,18 @@ def Conv(prog, """ # I/O - var_x, var_w = inputs[:2] - var_y, var_b = (outputs + [None] * 1)[:2] + var_x, var_w, var_b, = (inputs + [None] * 1)[:3] + var_y, = outputs # interpretation assert attrs.get( - 'auto_pad', 'NOTSET' - ) == 'NOTSET', 'only auto_pad == NOTSET is supported' # optional - kernel_shape = _shape(value_infos, var_w)[2:] # OI... - assert kernel_shape == attrs[ - 'kernel_shape'], 'kernel_shape in attr unmatches value_info' # HW + 'auto_pad', + 'NOTSET') == 'NOTSET', 'only auto_pad = NOTSET supported' # optional + kernel_shape = attrs.get('kernel_shape', + _shape(value_infos, var_w)[2:]) # optional, HW + assert kernel_shape, 'kernel_shape not inferred' convnd = len(kernel_shape) - assert 2 <= convnd <= 3, 'only conv2d and conv3d is supported' + assert 2 <= convnd <= 3, 'only conv2d and conv3d supported' num_out_channels = _shape(value_infos, var_w)[0] # OI... fluid_op = 'conv{}d'.format(convnd) @@ -931,16 +946,15 @@ def Conv(prog, dilations = attrs.get('dilations', [1] * convnd) # optional pads = attrs.get('pads', [0] * (convnd * 2)) # optional paddings, var_x = _pad_if_asymmetric(prog, pads, var_x, value_infos) - name_attr = ', name={}'.format(repr(name)) if name else '' + name_attr = ', name={}'.format(repr(name)) if embed_params: - embed_params = (_check_embeddable(value_infos, var_w) and not var_b - or _check_embeddable(value_infos, var_b)) - if not embed_params and name: + embed_params = _check_embeddable( + value_infos, *([var_w] + ([var_b] if var_b else []))) + if not embed_params: _logger.warning('for op %s(%s -> Conv -> %s)', name, inputs, outputs) _logger.warning('broken Python code will be generated') if embed_params: - assert name != '' embedded_w = name + '.w_0' value_infos[var_w]['embedded_as'].append(embedded_w) var_w = embedded_w @@ -978,11 +992,11 @@ def Conv(prog, param_attr, name_attr, )) - var_conv = name + '.conv' # hidden variable + var_conv = (name + '.conv') if var_b else var_y # hidden variable prog.OpDesc( fluid_op, - ([var_x, var_w], 'Input', 'Filter'), # , 'Bias', 'ResidualData' - ([var_conv if var_b else var_y], 'Output'), + (['Input', 'Filter'], [var_x, var_w]), # , 'Bias', 'ResidualData' + (['Output'], [var_conv]), { 'strides': strides, 'paddings': paddings, @@ -1010,7 +1024,7 @@ def ConvTranspose(prog, outputs, attrs, value_infos, - name='', + name, embed_params=False, *args, **kwargs): @@ -1019,39 +1033,39 @@ def ConvTranspose(prog, """ # I/O - var_x, var_w = inputs[:2] - var_y, var_b = (outputs + [None] * 1)[:2] + var_x, var_w, var_b, = (inputs + [None] * 1)[:3] + var_y, = outputs # interpretation assert attrs.get( - 'auto_pad', 'NOTSET' - ) == 'NOTSET', 'only auto_pad == NOTSET is supported' # optional - assert sum(attrs.get( - 'output_padding', - [])) == 0, 'only zero output_padding is supported' # optional ? - kernel_shape = _shape(value_infos, var_w)[2:] # IO... - assert kernel_shape == attrs[ - 'kernel_shape'], 'kernel_shape in attr unmatches value_info' # HW + 'auto_pad', + 'NOTSET') == 'NOTSET', 'only auto_pad = NOTSET supported' # optional + assert sum( + attrs.get('output_padding', + [])) == 0, 'only zero output_padding supported' # optional ? + kernel_shape = attrs.get('kernel_shape', + _shape(value_infos, var_w)[2:]) # optional, HW + assert kernel_shape, 'kernel_shape not inferred' convnd = len(kernel_shape) - assert 2 <= convnd <= 3, 'only conv2d_transpose and conv3d_transpose is supported' + assert 2 <= convnd <= 3, 'only conv2d_transpose and conv3d_transpose supported' num_out_channels = _shape(value_infos, var_w)[1] # IO... fluid_op = 'conv{}d_transpose'.format(convnd) num_groups = attrs.get('group', 1) # optional strides = attrs.get('strides', [1] * convnd) # optional dilations = attrs.get('dilations', [1] * convnd) # optional + output_size = attrs.get('output_shape', []) # optional pads = attrs.get('pads', [0] * (convnd * 2)) # optional paddings, var_x = _pad_if_asymmetric(prog, pads, var_x, value_infos) - name_attr = ', name={}'.format(repr(name)) if name else '' + name_attr = ', name={}'.format(repr(name)) if embed_params: - embed_params = (_check_embeddable(value_infos, var_w) and not var_b - or _check_embeddable(value_infos, var_b)) - if not embed_params and name: + embed_params = _check_embeddable( + value_infos, *([var_w] + ([var_b] if var_b else []))) + if not embed_params: _logger.warning('for op %s(%s -> ConvTranspose -> %s)', name, inputs, outputs) _logger.warning('broken Python code will be generated') if embed_params: - assert name != '' embedded_w = name + '.w_0' value_infos[var_w]['embedded_as'].append(embedded_w) var_w = embedded_w @@ -1070,7 +1084,7 @@ def ConvTranspose(prog, # generation prog.Code('{} = layers.{}({}' ', num_filters={}' - # ', output_size={}' + ', output_size={}' ', filter_size={}' ', padding={}' ', stride={}' @@ -1082,6 +1096,7 @@ def ConvTranspose(prog, var_x, # attrs num_out_channels, + output_size or None, kernel_shape, paddings, strides, @@ -1090,17 +1105,18 @@ def ConvTranspose(prog, param_attr, name_attr, )) - var_conv = name + '.conv' # hidden variable + var_conv = (name + '.conv') if var_b else var_y # hidden variable prog.OpDesc( fluid_op, - ([var_x, var_w], 'Input', 'Filter'), # , 'Bias', 'ResidualData' - ([var_conv if var_b else var_y], 'Output'), + (['Input', 'Filter'], [var_x, var_w]), # , 'Bias', 'ResidualData' + (['Output'], [var_conv]), { 'strides': strides, 'paddings': paddings, 'dilations': dilations, - # 'output_size': output_size, 'groups': num_groups, + # unused + 'output_size': output_size, }, ) if var_b: @@ -1124,7 +1140,7 @@ def Gemm(prog, inputs, outputs, attrs, value_infos, name, *args, **kwargs): """ # due to fluid fc don't support transposed weight, we use matmul + ew_add - var_a, var_b, var_c = inputs + var_a, var_b, var_c, = inputs var_y, = outputs alpha = attrs.get('alpha', 1.) # optional @@ -1132,19 +1148,19 @@ def Gemm(prog, inputs, outputs, attrs, value_infos, name, *args, **kwargs): trans_a = bool(attrs.get('transA', 0)) # optional trans_b = bool(attrs.get('transB', 0)) # optional - var_mm = name + '_mm' # explicit variable + var_mm = var_y if beta == 0 else (name + '_mmed') # explicit variable prog.Op( '', 'MatMul', [var_a, var_b], - [var_mm], # val + [var_mm], { 'transpose_x': trans_a, 'transpose_y': trans_b, 'alpha': alpha, }, value_infos=value_infos, - name=var_mm, + name=(name + '_mm'), ) prog.op_descs[-1].attrs.extend( prog.OpDescAttrs({ @@ -1157,10 +1173,10 @@ def Gemm(prog, inputs, outputs, attrs, value_infos, name, *args, **kwargs): '', 'Add', [var_mm, var_c], - [var_y], # val + [var_y], {'axis': 1}, value_infos=value_infos, - name=(name + '_beta'), + name=(name + '_bias'), ) else: var_beta = name + '_beta' # explicit variable @@ -1179,7 +1195,7 @@ def Gemm(prog, inputs, outputs, attrs, value_infos, name, *args, **kwargs): '', 'Constant', [], - [var_beta], # val + [var_beta], {'value': beta}, value_infos=value_infos, name=var_beta, @@ -1188,17 +1204,17 @@ def Gemm(prog, inputs, outputs, attrs, value_infos, name, *args, **kwargs): '', 'Mul', [var_c, var_beta], - [var_vm], # val + [var_vm], dict(), value_infos=value_infos, - name=(name + '_scale'), + name=(var_beta + '_scale'), ) prog.Op( '', 'Add', [var_mm, var_vm], - [var_y], # val - {'axis': 1}, + [var_y], + {'axis': 1}, # name=(name + '_bias'), ) @@ -1250,57 +1266,441 @@ def GRU(prog, inputs, outputs, attrs, value_infos, *args, **kwargs): onnx::GRU-7: """ - var_x, var_w, var_r, var_b, var_len, var_xh = (inputs + [None] * 3)[:6] - var_y, var_yh = (outputs + [None] * 2)[:2] + var_x, var_w, var_r, var_b, var_len, var_xh, = (inputs + [None] * 3)[:6] + var_y, var_yh, = (outputs + [None] * 2)[:2] + var_gate = var_y + '.gate' # dummy output + var_reset = var_y + '.reset' # dummy output + var_hidden = var_y + '.hidden' # dummy output, # var_yh # interpretation - fluid_op = 'gru_unit' - param_attr = '' + x_shape = _shape_or_none(value_infos, var_x) + assert x_shape is not None, 'shape of X required to be known' + assert x_shape[1] == 1, 'only X with batch_size = 1 supported' + assert 'clip' not in attrs, 'clipping not supported' + hidden_size = attrs.get('hidden_size', None) # optional + if not hidden_size: + r_shape = _shape_or_none(value_infos, var_r) + if r_shape: + hidden_size = r_shape[-1] + if not hidden_size: + w_shape = _shape_or_none(value_infos, var_w) + if w_shape: + hidden_size = w_shape[-2] // 3 + if not hidden_size and var_b: + b_shape = _shape_or_none(value_infos, var_b) + if b_shape: + hidden_size = b_shape[-1] // 6 + if not hidden_size and var_xh: + xh_shape = _shape_or_none(value_infos, var_xh) + if xh_shape: + hidden_size = xh_shape[-1] + assert hidden_size, 'hidden_size not inferred' + assert attrs.get( + 'linear_before_reset', + 0) == 0, 'only linear_before_reset = 0 supported' # optional + direction = attrs.get('direction', 'forward') # optional + assert direction != 'bidirectional', 'direction = bidirectional not supported' + activations = attrs.get('activations', ['Sigmoid', 'Tanh']) # optional + assert len(activations) == 2, 'bidirectional operation not supported' + activations = [s.lower() for s in activations] # TODO: check support + gate_activation, candidate_activation = activations + is_reverse = direction == 'reverse' + + fluid_op = 'dynamic_gru' # generation - prog.Code('{}, _, {} = layers.{}({}, {}, {}' - '{})'.format( - var_yh, - var_y, + var_x0 = var_x + '_0' # explicit variable + prog.Op( + '', + 'Squeeze', + [var_x], + [var_x0], + {'axes': [1]}, # index on n + name=(var_x + '_index'), + ) + var_w0 = var_w + '_0' # explicit variable + prog.Op( + '', + 'Squeeze', + [var_w], + [var_w0], + {'axes': [0]}, # index on d + name=(var_w + '_index'), + ) + var_fc = var_x0 + '_fc' + var_mm = (var_x0 + '_mmed') if var_b else var_fc + prog.Op( + '', + 'MatMul', + [var_x0, var_w0], + [var_mm], + { + 'transpose_x': 0, + 'transpose_y': 1, + }, + value_infos=value_infos, + name=(var_x0 + '_mm'), + ) + prog.op_descs[-1].attrs.extend( + prog.OpDescAttrs({ + 'transpose_X': 0, + 'transpose_Y': 1, + })) # f**k you API + var_r0 = var_r + '_0' # explicit variable + prog.Op( + '', + 'Squeeze', + [var_r], + [var_r0], + {'axes': [0]}, # index on d + name=(var_r + '_index'), + ) + var_r0t = var_r0 + '_t' # explicit variable + prog.Op( + '', + 'Transpose', + [var_r0], + [var_r0t], + {'perm': [1, 0]}, # transpose OI->IO + name=(var_r0 + '_transpose'), + ) + if var_b: + var_bi = var_b + '_i' # explicit variable + var_bh = var_b + '_h' # explicit variable + prog.Op( + '', + 'Split', + [var_b], + [var_bi, var_bh], + { + 'axis': 1, # split on x + 'split': [hidden_size * 3, hidden_size * 3], + }, + name=(var_b + '_split'), + ) + # squeeze bi so Gemm Add can be performed on axis=1 exaclty + var_bi0 = var_bi + '_0' # explicit variable + prog.Op( + '', + 'Squeeze', + [var_bi], + [var_bi0], + {'axes': [0]}, # slice on d + name=(var_bi + '_index'), + ) + prog.Op( + '', + 'Add', + [var_mm, var_bi0], + [var_fc], + {'axis': 1}, # + name=(var_x0 + '_bias'), + ) + if var_xh: + var_xh0 = var_xh + '_0' # explicit variable + prog.Op( + '', + 'Squeeze', + [var_xh], + [var_xh0], + {'axes': [1]}, # index on n + name=(var_xh + '_index'), + ) + var_y00 = var_y + '_00' # explicit variable + prog.Code('{} = layers.{}({}, {}, origin_mode=True' + ', h_0={}' + ', is_reverse={}' + ', gate_activation={}' + ', candidate_activation={}' + ', param_attr={}, bias_attr={})'.format( + var_y00, fluid_op, - var_x, - var_xh, - 0, - param_attr, + var_fc, + hidden_size, + var_xh0 if var_xh else None, + is_reverse, + repr(gate_activation), + repr(candidate_activation), + repr(var_r0t), + repr(var_bh) if var_b else False, )) - -# raise NotImplementedError() + fluid_op = 'gru' + prog.VarDesc(var_y00) + prog.VarDesc(var_gate) + prog.VarDesc(var_reset) + prog.VarDesc(var_hidden) + prog.OpDesc( + fluid_op, + (['Input', 'Weight', 'Bias', 'H0'], [var_fc, var_r0t] + + ([var_bh] if var_b else []) + ([var_xh0] if var_xh else [])), + (['Hidden', 'BatchGate', 'BatchResetHiddenPrev', 'BatchHidden' + ], [var_y00, var_gate, var_reset, var_hidden]), + { + 'is_reverse': is_reverse, + 'gate_activation': gate_activation, + 'activation': candidate_activation, + 'origin_mode': True, + }, + ) + prog.Op( + '', + 'Unsqueeze', + [var_y00], + [var_y], + {'axes': [1, 1]}, # extrude on dn + name=(var_y + '_reshape'), + ) -def LSTM(prog, inputs, outputs, attrs, value_infos, name='', *args, **kwargs): +def LSTM(prog, inputs, outputs, attrs, value_infos, name, *args, **kwargs): """ onnx::LSTM-7: """ - var_x, var_w, var_r, var_b, var_len, var_xh, var_xc, var_p = ( + var_x, var_w, var_r, var_b, var_len, var_xh, var_xc, var_p, = ( inputs + [None] * 5)[:8] - var_y, var_yh, var_yc = (outputs + [None] * 3)[:3] + var_y, var_yh, var_yc, = (outputs + [None] * 3)[:3] + var_gate = name + '.gate' + var_pre = name + '.pre' # interpretation - fluid_op = 'lstm_unit' - param_attr = '' + x_shape = _shape_or_none(value_infos, var_x) + assert x_shape is not None, 'shape of X required to be known' + assert x_shape[1] == 1, 'only X with batch_size = 1 supported' + assert 'clip' not in attrs, 'clipping not supported' + hidden_size = attrs.get('hidden_size', None) # optional + if not hidden_size: + r_shape = _shape_or_none(value_infos, var_r) + if r_shape: + hidden_size = r_shape[-1] + if not hidden_size: + w_shape = _shape_or_none(value_infos, var_w) + if w_shape: + hidden_size = w_shape[-2] // 4 + if not hidden_size and var_b: + b_shape = _shape_or_none(value_infos, var_b) + if b_shape: + hidden_size = b_shape[-1] // 8 + if not hidden_size and var_xh: + xh_shape = _shape_or_none(value_infos, var_xh) + if xh_shape: + hidden_size = xh_shape[-1] + if not hidden_size and var_xc: + xc_shape = _shape_or_none(value_infos, var_xc) + if xc_shape: + hidden_size = xc_shape[-1] + if not hidden_size and var_p: + p_shape = _shape_or_none(value_infos, var_p) + if p_shape: + hidden_size = p_shape[-1] // 3 + assert hidden_size, 'hidden_size not inferred' + assert attrs.get( + 'linear_before_reset', + 0) == 0, 'only linear_before_reset = 0 supported' # optional + assert attrs.get('input_forget', + 0) == 0, 'only input_forget = 0 supported' # optional + direction = attrs.get('direction', 'forward') # optional + assert direction != 'bidirectional', 'direction = bidirectional not supported' + activations = attrs.get('activations', + ['Sigmoid', 'Tanh', 'Tanh']) # optional + assert len(activations) == 3, 'bidirectional operation not supported' + activations = [s.lower() for s in activations] # TODO: check support + gate_activation, cell_activation, candidate_activation = activations + is_reverse = direction == 'reverse' + + fluid_op = 'dynamic_lstm' + name_attr = ', name={}'.format(repr(name)) # generation - prog.Code('{}, {}, {} = layers.{}({}, {}, {}' + var_x0 = var_x + '_0' # explicit variable + prog.Op( + '', + 'Squeeze', + [var_x], + [var_x0], + {'axes': [1]}, # index on n + name=(var_x + '_index'), + ) + var_w0 = var_w + '_0' # explicit variable + prog.Op( + '', + 'Squeeze', + [var_w], + [var_w0], + {'axes': [0]}, # index on d + name=(var_w + '_index'), + ) + var_fc = var_x0 + '_fc' + var_mm = (var_x0 + '_mmed') if var_b else var_fc + prog.Op( + '', + 'MatMul', + [var_x0, var_w0], + [var_mm], + { + 'transpose_x': 0, + 'transpose_y': 1, + }, + value_infos=value_infos, + name=(name + '_mm'), + ) + prog.op_descs[-1].attrs.extend( + prog.OpDescAttrs({ + 'transpose_X': 0, + 'transpose_Y': 1, + })) # f**k you API + var_r0 = var_r + '_0' # explicit variable + prog.Op( + '', + 'Squeeze', + [var_r], + [var_r0], + {'axes': [0]}, # index on d + name=(var_r + '_index'), + ) + var_r0t = var_r0 + '_t' # explicit variable + prog.Op( + '', + 'Transpose', + [var_r0], + [var_r0t], + {'perm': [1, 0]}, # transpose OI->IO + name=(var_r0 + '_transpose'), + ) + if var_b: + var_bi = var_b + '_i' # explicit variable + var_bh = var_b + '_h' # explicit variable + prog.Op( + '', + 'Split', + [var_b], + [var_bi, var_bh], + { + 'axis': 1, # split on x + 'split': [hidden_size * 4, hidden_size * 4], + }, + name=(var_b + '_split'), + ) + # squeeze bi so Gemm Add can be performed on axis=1 exaclty + var_bi0 = var_bi + '_0' # explicit variable + prog.Op( + '', + 'Squeeze', + [var_bi], + [var_bi0], + {'axes': [0]}, # slice on d + name=(var_bi + '_index'), + ) + prog.Op( + '', + 'Add', + [var_mm, var_bi0], + [var_fc], + {'axis': 1}, # + name=(name + '_bias'), + ) + if var_xh: + var_xh0 = var_xh + '_0' # explicit variable + prog.Op( + '', + 'Squeeze', + [var_xh], + [var_xh0], + {'axes': [1]}, # index on n + name=(var_xh + '_index'), + ) + if var_xc: + var_xc0 = var_xc + '_0' # explicit variable + prog.Op( + '', + 'Squeeze', + [var_xc], + [var_xc0], + {'axes': [1]}, # index on n + name=(var_xc + '_index'), + ) + var_bhp = var_p + if var_b: + if var_p: + var_bhp = var_bh + '_p' # explicit variable + prog.Op( + '', + 'Concat', + [var_bh, var_p], + [var_bhp], + {'axes': [1]}, # cat on x + name=(name + '_concat'), + ) + else: + var_bhp = var_bh + var_yh0 = var_yh + '_0' # explicit variable + var_yc0 = var_yc + '_0' # explicit variable + prog.Code('{}, {} = layers.{}({}, {}' + ', h_0={}' + ', c_0={}' + ', use_peepholes={}' + ', is_reverse={}' + ', gate_activation={}' + ', cell_activation={}' + ', candidate_activation={}' + ', param_attr={}, bias_attr={}' '{})'.format( - var_y, - var_yh, - var_yc, + var_yh0, + var_yc0, fluid_op, - var_x, - var_xh, - var_xc, - param_attr, + var_fc, + hidden_size * 4, + var_xh0 if var_xh else None, + var_xc0 if var_xc else None, + bool(var_p), + is_reverse, + repr(gate_activation), + repr(cell_activation), + repr(candidate_activation), + repr(var_r0t), + repr(var_bhp) if var_bhp else False, + name_attr, )) - -# raise NotImplementedError() + fluid_op = 'lstm' + prog.VarDesc(var_yh0) + prog.VarDesc(var_yc0) + prog.VarDesc(var_gate) + prog.VarDesc(var_pre) + prog.OpDesc( + fluid_op, + (['Input', 'Weight', 'Bias', 'H0', 'C0'], [var_fc, var_r0t] + + ([var_bhp] if var_bhp else []) + ([var_xh0] if var_xh else []) + + ([var_xc0] if var_xc else [])), + (['Hidden', 'Cell', 'BatchGate', 'BatchCellPreAct' + ], [var_yh0, var_yc0, var_gate, var_pre]), + { + 'use_peepholes': bool(var_p), + 'is_reverse': is_reverse, + 'gate_activation': gate_activation, + 'cell_activation': cell_activation, + 'candidate_activation': candidate_activation, + }, + ) + # if var_yh: + prog.Op( + '', + 'Unsqueeze', + [var_yh0], + [var_y], # var_yh + {'axes': [1, 1]}, # extrude on dn + name=(var_y + '_reshape'), + ) + if var_yc: + prog.Op( + '', + 'Unsqueeze', + [var_yc0], + [var_yc], + {'axes': [1, 1]}, # extrude on dn + name=(var_yc + '_reshape'), + ) def MaxPool(prog, inputs, outputs, attrs, value_infos, name='', *args, @@ -1350,7 +1750,7 @@ def Pad(prog, inputs, outputs, attrs, value_infos, name='', *args, **kwargs): od_attrs['mode'] = mode od_attrs['data_format'] = "NCHW" else: - assert mode == 'constant', 'mode {} is supported only in pad2d'.format( + assert mode == 'constant', 'mode {} supported only in pad2d'.format( mode) fluid_op = 'pad' pad2d_attr = '' @@ -1376,8 +1776,8 @@ def Pad(prog, inputs, outputs, attrs, value_infos, name='', *args, **kwargs): prog.VarDesc(var_output) prog.OpDesc( fluid_op, - ([var_data], 'X'), - ([var_output], 'Out'), + (['X'], [var_data]), + (['Out'], [var_output]), od_attrs, ) @@ -1396,7 +1796,7 @@ def PRelu(prog, """ # I/O - var_x, var_slope = inputs + var_x, var_slope, = inputs var_y, = outputs # interpretation @@ -1441,8 +1841,8 @@ def PRelu(prog, prog.VarDesc(var_y) prog.OpDesc( fluid_op, - ([var_x, var_slope], 'X', 'Alpha'), - ([var_y], 'Out'), + (['X', 'Alpha'], [var_x, var_slope]), + (['Out'], [var_y]), {'mode': mode}, ) @@ -1461,7 +1861,7 @@ def Reshape(prog, inputs, outputs, attrs, value_infos, name, *args, **kwargs): """ # I/O - var_data, var_shape = inputs + var_data, var_shape, = inputs var_reshaped, = outputs # interpretation @@ -1481,7 +1881,7 @@ def Reshape(prog, inputs, outputs, attrs, value_infos, name, *args, **kwargs): 'the behavior of Paddle fluid maybe undefined', name, inputs, outputs) fluid_op = 'reshape' - name_attr = ', name={}'.format(repr(name)) if name else '' + name_attr = ', name={}'.format(repr(name)) # generation var_shape_int32 = var_shape + '_int32' # explicit variable @@ -1502,7 +1902,7 @@ def Reshape(prog, inputs, outputs, attrs, value_infos, name, *args, **kwargs): '', 'Cast', [var_shape], - [var_shape_int32], # var + [var_shape_int32], {'to': _np.dtype('int32')}, # use np.dtype value_infos=value_infos, name=(name + '_cast'), @@ -1525,8 +1925,8 @@ def Reshape(prog, inputs, outputs, attrs, value_infos, name, *args, **kwargs): prog.VarDesc(var_xshape) prog.OpDesc( fluid_op, - ([var_data, var_shape_int32], 'X', 'Shape'), - ([var_reshaped, var_xshape], 'Out', 'XShape'), + (['X', 'Shape'], [var_data, var_shape_int32]), + (['Out', 'XShape'], [var_reshaped, var_xshape]), {'shape': shape}, ) @@ -1626,8 +2026,8 @@ def Slice(prog, inputs, outputs, attrs, value_infos, *args, **kwargs): prog.VarDesc(var_output) prog.OpDesc( fluid_op, - ([var_data], 'Input'), - ([var_output], 'Out'), + (['Input'], [var_data]), + (['Out'], [var_output]), { 'axes': axes, 'starts': starts, @@ -1666,11 +2066,13 @@ def Split(prog, inputs, outputs, attrs, *args, name='', **kwargs): prog.VarDesc(var_out) prog.OpDesc( fluid_op, - (var_input, 'X'), - ([outputs], *(['Out'] * len(outputs))), + (['X'], [var_input]), + (['Out'] * len(outputs), outputs), { 'axis': axis, 'sections': split, + # unused + 'num': 0, }, ) @@ -1697,8 +2099,8 @@ def Sum(prog, inputs, outputs, *args, **kwargs): prog.VarDesc(var_sum) prog.OpDesc( fluid_op, - (inputs, *(['X'] * len(inputs))), - ([var_sum], 'Out'), + (['X'] * len(inputs), inputs), + (['Out'], [var_sum]), dict(), ) @@ -1709,12 +2111,12 @@ def Tile(prog, inputs, outputs, attrs, value_infos, name='', *args, **kwargs): """ # I/O - var_input, var_repeats = inputs + var_input, var_repeats, = inputs var_output, = outputs # interpretation repeats = _const_weight_or_none(value_infos, var_repeats) - assert repeats is not None, 'only const repeats is supported' + assert repeats is not None, 'only const repeats supported' fluid_op = 'expand' name_attr = ', name={}'.format(repr(name)) if name else '' @@ -1733,8 +2135,8 @@ def Tile(prog, inputs, outputs, attrs, value_infos, name='', *args, **kwargs): prog.VarDesc(var_output) prog.OpDesc( fluid_op, - ([var_input], 'X'), - ([var_output], 'Out'), + (['X'], [var_input]), + (['Out'], [var_output]), {'expand_times': repeats}, ) @@ -1770,8 +2172,8 @@ def Transpose(prog, inputs, outputs, attrs, *args, name='', **kwargs): prog.VarDesc(var_transposed) prog.OpDesc( fluid_op, - ([var_data], 'X'), - ([var_transposed, var_xshape], 'Out', 'XShape'), + (['X'], [var_data]), + (['Out', 'XShape'], [var_transposed, var_xshape]), {'axis': perm}, # f**k you API ) diff --git a/onnx2fluid/onnx2fluid/validation.py b/onnx2fluid/onnx2fluid/validation.py index 4374c7eb448fddafbf1514657545c30acefbeaa5..b397725d017db5c5c720621d53516fcc99aba5c7 100644 --- a/onnx2fluid/onnx2fluid/validation.py +++ b/onnx2fluid/onnx2fluid/validation.py @@ -159,7 +159,7 @@ def validate(fluid_model_filename, # output_names = output_data.keys() logger.info('with %d inputs and %d outputs', len(input_data), len(output_data)) - else: + elif save_inference_model: assert inference_input_names, 'input names required for type-shape inference' input_names = inference_input_names diff --git a/onnx2fluid/onnx2fluid/writer.py b/onnx2fluid/onnx2fluid/writer.py index 9d8ba4b7990b2bf0e9357afd3a1583943ed64e6b..90ac8a9ca8bbc43ce390ffeaadc8fb451cb11a09 100644 --- a/onnx2fluid/onnx2fluid/writer.py +++ b/onnx2fluid/onnx2fluid/writer.py @@ -96,7 +96,7 @@ class Program(object): return Program.DTYPE_TO_FRAMEWORK_DTYPE[dtype] @staticmethod - def OpDescVars(vals, *keys): + def OpDescVars(keys, vals): """ make (OpDesc.Var)s """ @@ -150,13 +150,11 @@ class Program(object): else: raise ValueError('unsupported attribute {} = {}'.format( key, value)) - else: # WORKAROUND: shape of scalars is [] - raise ValueError('unsupported attribute {} = {}'.format( - key, value)) - - -# od_attr.type = framework_pb2.INTS -# logger.warning('using attribute %s = %s as INTS', key, value) + else: # WORKAROUND: [] not inferred + # raise ValueError('unsupported attribute {} = {}'.format(key, value)) + od_attr.type = framework_pb2.INTS + logger.warning('using attribute %s = %s as INTS', key, + value) else: raise ValueError('unsupported attribute {} = {}'.format( key, value)) @@ -187,8 +185,8 @@ class Program(object): def OpDesc(self, op_type, - input_val_keys=None, - output_val_keys=None, + input_key_vals=None, + output_key_vals=None, attrs=None): """ add OpDesc @@ -196,10 +194,10 @@ class Program(object): desc = framework_pb2.OpDesc() desc.type = op_type - if input_val_keys: - desc.inputs.extend(self.OpDescVars(*input_val_keys)) - if output_val_keys: - desc.outputs.extend(self.OpDescVars(*output_val_keys)) + if input_key_vals: + desc.inputs.extend(self.OpDescVars(*input_key_vals)) + if output_key_vals: + desc.outputs.extend(self.OpDescVars(*output_key_vals)) if attrs: desc.attrs.extend(self.OpDescAttrs(attrs)) self.op_descs.append(desc) @@ -388,8 +386,8 @@ class Writer(object): )) prog.OpDesc( 'feed', - (['feed'], 'X'), - ([name], 'Out'), + (['X'], ['feed']), + (['Out'], [name]), {'col': idx}, ) prog.VarDesc(name, value_info=value_info, remove_batch=remove_batch) @@ -406,8 +404,8 @@ class Writer(object): prog.OpDesc( 'fetch', - ([name], 'X'), - (['fetch'], 'Out'), + (['X'], [name]), + (['Out'], ['fetch']), {'col': idx}, ) # var is emitted over ops @@ -424,12 +422,16 @@ class Writer(object): return codes @staticmethod - def write_weight(weight, filename): + def write_weight(weight, filename, lod=None): """ write single weight in fluid desc """ assert isinstance(weight, np.ndarray), 'weight is not an ndarray' + assert lod is None or isinstance(lod, + list), 'lod should be None or list' + + lod = lod or [0] tensor_desc = framework_pb2.VarType.TensorDesc() tensor_desc.data_type = Program.Dtype(weight.dtype) @@ -437,7 +439,7 @@ class Writer(object): fp = open(filename, 'wb') np.array([0], dtype=np.int32).tofile(fp) # version - np.array([0], dtype=np.int64).tofile(fp) # LOD level + np.array(lod, dtype=np.int64).tofile(fp) # LOD level np.array([0], dtype=np.int32).tofile(fp) # tensor version np.array([tensor_desc.ByteSize()], dtype=np.int32).tofile(fp) fp.write(tensor_desc.SerializeToString())