From ae9170972ccf0dba5c337768d6d84f927cf8c8c8 Mon Sep 17 00:00:00 2001 From: Weilong Wu Date: Tue, 12 Jul 2022 17:34:27 +0800 Subject: [PATCH] Fix ci tests under eager mode (#1264) --- paddleslim/core/dygraph.py | 18 +- paddleslim/nas/ofa/layers.py | 290 ++++++++++++++++++++++++------- paddleslim/nas/ofa/layers_old.py | 258 ++++++++++++++++++--------- tests/dygraph/test_flops.py | 1 - tests/test_dy2prog.py | 2 +- tests/test_ofa.py | 2 + tests/test_ofa_v2.py | 29 ---- 7 files changed, 414 insertions(+), 186 deletions(-) diff --git a/paddleslim/core/dygraph.py b/paddleslim/core/dygraph.py index cd6f920c..a5f090ed 100644 --- a/paddleslim/core/dygraph.py +++ b/paddleslim/core/dygraph.py @@ -3,7 +3,8 @@ import paddle import collections import logging import numpy as np -from paddle.fluid.framework import _dygraph_tracer, dygraph_only, _dygraph_guard, program_guard +from paddle.fluid import core +from paddle.fluid.framework import _dygraph_tracer, dygraph_only, _dygraph_guard, program_guard, in_dygraph_mode from paddle.fluid.dygraph.base import program_desc_tracing_guard, _switch_declarative_mode_guard_ from paddle.fluid.dygraph.layers import Layer from paddle.fluid.framework import Block, ParamBase, Program, Variable @@ -108,8 +109,8 @@ def to_variables(inputs, is_static=False): """ Find and rename variables. Find np.ndarray and convert it to variable. """ - if isinstance(inputs, - (Variable, paddle.Tensor)) or isinstance(inputs, np.ndarray): + if isinstance(inputs, (Variable, paddle.Tensor)) or isinstance(inputs, + np.ndarray): if is_static: return _to_var(inputs) else: @@ -118,7 +119,7 @@ def to_variables(inputs, is_static=False): ret = {} for _key in inputs: ret[_key] = to_variables(inputs[_key], is_static) - return inputs + return ret elif isinstance(inputs, list): ret = [] for _value in inputs: @@ -140,7 +141,7 @@ def dygraph2program(layer, extract_inputs_fn = extract_inputs_fn if extract_inputs_fn is not None else extract_vars extract_outputs_fn = extract_outputs_fn if extract_outputs_fn is not None else extract_vars - if os.environ.get("FLAGS_enable_eager_mode") == "1": + if in_dygraph_mode(): return _dy2prog(layer, inputs, feed_prefix, fetch_prefix, tmp_prefix, extract_inputs_fn, extract_outputs_fn, dtypes) @@ -187,7 +188,6 @@ def _dy2prog(layer, Tracing program in Eager Mode. """ paddle.enable_static() - program = Program() # convert ParamBase into Parameter automatically by _switch_declarative_mode_guard_ with program_guard(program), _switch_declarative_mode_guard_(True): @@ -198,8 +198,10 @@ def _dy2prog(layer, inputs = _create_tensors(inputs, dtypes=dtypes, is_static=True) else: inputs = to_variables(inputs, is_static=True) - inputs = extract_inputs_fn(inputs) - outputs = layer(*inputs) + if isinstance(inputs, list): + outputs = layer(*inputs) + else: + outputs = layer(inputs) paddle.disable_static() diff --git a/paddleslim/nas/ofa/layers.py b/paddleslim/nas/ofa/layers.py index 5da53d39..96bec907 100644 --- a/paddleslim/nas/ofa/layers.py +++ b/paddleslim/nas/ofa/layers.py @@ -21,7 +21,9 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle.fluid.core as core from paddle import _C_ops -from paddle.fluid.framework import in_dygraph_mode +from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph, _non_static_mode +from paddle.fluid.data_feeder import check_variable_and_dtype +from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper from ...common import get_logger from .utils.utils import compute_start_end, get_same_padding, convert_to_list @@ -209,7 +211,10 @@ class SuperConv2D(nn.Conv2D): def get_active_filter(self, in_nc, out_nc, kernel_size): start, end = compute_start_end(self._kernel_size[0], kernel_size) ### if NOT transform kernel, intercept a center filter with kernel_size from largest filter - filters = self.weight[:out_nc, :in_nc, start:end, start:end] + if self.weight.shape[0] <= out_nc and self.weight.shape[1] <= in_nc: + filters = self.weight + else: + filters = self.weight[:out_nc, :in_nc, start:end, start:end] if self.transform_kernel != False and kernel_size < self._kernel_size[ 0]: ### if transform kernel, then use matrix to transform @@ -304,7 +309,10 @@ class SuperConv2D(nn.Conv2D): ### if in_nc = groups, slice the shape of bias by weight_out_nc. if groups != in_nc: weight_out_nc = weight_out_nc * groups - bias = self.bias[:weight_out_nc] + if weight_out_nc >= self.bias.shape[0]: + bias = self.bias + else: + bias = self.bias[:weight_out_nc] else: bias = self.bias self.cur_config['prune_dim'] = list(weight.shape) @@ -884,10 +892,15 @@ class SuperLinear(nn.Linear): out_nc = int(channel) else: out_nc = self._out_features - - weight = self.weight[:in_nc, :out_nc] + if self.weight.shape[0] <= in_nc and self.weight.shape[1] <= out_nc: + weight = self.weight + else: + weight = self.weight[:in_nc, :out_nc] if self._bias_attr != False: - bias = self.bias[:out_nc] + if self.bias.shape[0] <= out_nc: + bias = self.bias + else: + bias = self.bias[:out_nc] else: bias = self.bias self.cur_config['prune_dim'] = list(weight.shape) @@ -944,10 +957,22 @@ class SuperBatchNorm2D(nn.BatchNorm2D): self._check_input_dim(input) feature_dim = int(input.shape[1]) - weight = self.weight[:feature_dim] - bias = self.bias[:feature_dim] - mean = self._mean[:feature_dim] - variance = self._variance[:feature_dim] + if self.weight.shape[0] <= feature_dim: + weight = self.weight + else: + weight = self.weight[:feature_dim] + if self.bias.shape[0] <= feature_dim: + bias = self.bias + else: + bias = self.bias[:feature_dim] + if self._mean.shape[0] <= feature_dim: + mean = self._mean + else: + mean = self._mean[:feature_dim] + if self._variance.shape[0] <= feature_dim: + variance = self._variance + else: + variance = self._variance[:feature_dim] mean_out = self._mean variance_out = self._variance @@ -965,51 +990,91 @@ class SuperBatchNorm2D(nn.BatchNorm2D): "use_mkldnn", False, "fuse_with_relu", False, "use_global_stats", self._use_global_stats, "trainable_statistics", trainable_statistics) - try: - from paddle import _C_ops - from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph - if in_dygraph_mode(): - if feature_dim != self._mean.shape[0]: - batch_norm_out = _C_ops.final_state_batch_norm( - input, weight, bias, mean, variance, mean_out_tmp, - variance_out_tmp, *attrs) - self._mean[:feature_dim].set_value(mean) - self._variance[:feature_dim].set_value(variance) - mean_out[:feature_dim].set_value(mean_out_tmp) - variance_out[:feature_dim].set_value(variance_out_tmp) - else: - batch_norm_out = _C_ops.final_state_batch_norm( - input, weight, bias, self._mean, self._variance, - mean_out, variance_out, *attrs) - elif _in_legacy_dygraph(): - if feature_dim != self._mean.shape[0]: - batch_norm_out = core.ops.batch_norm( - input, weight, bias, mean, variance, None, mean_out_tmp, - variance_out_tmp, *attrs) - self._mean[:feature_dim].set_value(mean) - self._variance[:feature_dim].set_value(variance) - mean_out[:feature_dim].set_value(mean_out_tmp) - variance_out[:feature_dim].set_value(variance_out_tmp) - else: - batch_norm_out = core.ops.batch_norm( - input, weight, bias, self._mean, self._variance, None, - mean_out, variance_out, *attrs) - except: + + if in_dygraph_mode(): if feature_dim != self._mean.shape[0]: - batch_norm_out = core.ops.batch_norm(input, weight, bias, mean, - variance, mean_out_tmp, - variance_out_tmp, *attrs) + batch_norm_out, t1, t2, t3, t4, _ = _C_ops.final_state_batch_norm( + input, weight, bias, mean, variance, self._momentum, + self._epsilon, self._data_format, not self.training, + self._use_global_stats, trainable_statistics, False, False) self._mean[:feature_dim].set_value(mean) self._variance[:feature_dim].set_value(variance) mean_out[:feature_dim].set_value(mean_out_tmp) variance_out[:feature_dim].set_value(variance_out_tmp) + return batch_norm_out else: - batch_norm_out = core.ops.batch_norm( - input, weight, bias, self._mean, self._variance, mean_out, - variance_out, *attrs) + batch_norm_out, t1, t2, t3, t4, _ = _C_ops.final_state_batch_norm( + input, weight, bias, mean, variance, self._momentum, + self._epsilon, self._data_format, not self.training, + self._use_global_stats, trainable_statistics, False) + return batch_norm_out + + elif _in_legacy_dygraph(): + if feature_dim != self._mean.shape[0]: + batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm( + input, weight, bias, mean, variance, None, mean_out_tmp, + variance_out_tmp, *attrs) + self._mean[:feature_dim].set_value(mean) + self._variance[:feature_dim].set_value(variance) + mean_out[:feature_dim].set_value(mean_out_tmp) + variance_out[:feature_dim].set_value(variance_out_tmp) + return batch_norm_out + else: + batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm( + input, weight, bias, self._mean, self._variance, None, + mean_out, variance_out, *attrs) + return batch_norm_out + + check_variable_and_dtype(input, 'input', + ['float16', 'float32', 'float64'], 'BatchNorm') + + # for static need dict + attrs = { + "momentum": self._momentum, + "epsilon": self._epsilon, + "is_test": not self.training, + "data_layout": self._data_format, + "use_mkldnn": False, + "fuse_with_relu": False, + "use_global_stats": self._use_global_stats, + "trainable_statistics": trainable_statistics, + } + + inputs = { + "X": [input], + "Scale": [weight], + "Bias": [bias], + "Mean": [mean], + "Variance": [variance] + } + helper = LayerObjectHelper('batch_norm') + + param_dtype = input.dtype if input.dtype != 'float16' else 'float32' + saved_mean = helper.create_variable_for_type_inference( + dtype=param_dtype, stop_gradient=True) + saved_variance = helper.create_variable_for_type_inference( + dtype=param_dtype, stop_gradient=True) + batch_norm_out = helper.create_variable_for_type_inference(input.dtype) + + outputs = { + "Y": [batch_norm_out], + "MeanOut": [mean], + "VarianceOut": [variance], + "SavedMean": [saved_mean], + "SavedVariance": [saved_variance] + } + + if self.training or trainable_statistics: + # reserve_space is only used for training. + reserve_space = helper.create_variable_for_type_inference( + dtype=input.dtype, stop_gradient=True) + outputs["ReserveSpace"] = [reserve_space] + + helper.append_op( + type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) self.cur_config = {'prune_dim': feature_dim} - return batch_norm_out[0] + return batch_norm_out class SuperSyncBatchNorm(nn.SyncBatchNorm): @@ -1045,19 +1110,65 @@ class SuperSyncBatchNorm(nn.SyncBatchNorm): "is_test", not self.training, "data_layout", self._data_format, "use_mkldnn", False, "fuse_with_relu", False, "use_global_stats", False, 'trainable_statistics', False) - if feature_dim != self._mean.shape[0]: - sync_batch_norm_out, _, _, _, _, _ = core.ops.sync_batch_norm( - input, weight, bias, mean, variance, mean_out_tmp, - variance_out_tmp, *attrs) - self._mean[:feature_dim].set_value(mean) - self._variance[:feature_dim].set_value(variance) - mean_out[:feature_dim].set_value(mean_out_tmp) - variance_out[:feature_dim].set_value(variance_out_tmp) - else: - sync_batch_norm_out, _, _, _, _, _ = core.ops.sync_batch_norm( - input, weight, bias, self._mean, self._variance, mean_out, - variance_out, *attrs) + if _non_static_mode(): + if feature_dim != self._mean.shape[0]: + sync_batch_norm_out, _, _, _, _, _ = _C_ops.sync_batch_norm( + input, weight, bias, self._mean, self._variance, mean_out, + variance_out, *attrs) + + self._mean[:feature_dim].set_value(mean) + self._variance[:feature_dim].set_value(variance) + mean_out[:feature_dim].set_value(mean_out_tmp) + variance_out[:feature_dim].set_value(variance_out_tmp) + else: + sync_batch_norm_out, _, _, _, _, _ = _C_ops.sync_batch_norm( + input, weight, bias, self._mean, self._variance, mean_out, + variance_out, *attrs) + + return sync_batch_norm_out + + check_variable_and_dtype( + input, 'input', ['float16', 'float32', 'float64'], 'SyncBatchNorm') + + attrs = { + "momentum": self._momentum, + "epsilon": self._epsilon, + "is_test": not self.training, + "data_layout": self._data_format, + "use_mkldnn": False, + "fuse_with_relu": False, + "use_global_stats": False, + "trainable_statistics": False, + } + + inputs = { + "X": [input], + "Scale": [weight], + "Bias": [bias], + "Mean": [self._mean], + "Variance": [self._variance] + } + + helper = LayerObjectHelper('sync_batch_norm') + + saved_mean = helper.create_variable_for_type_inference( + dtype=self._dtype, stop_gradient=True) + saved_variance = helper.create_variable_for_type_inference( + dtype=self._dtype, stop_gradient=True) + sync_batch_norm_out = helper.create_variable_for_type_inference( + self._dtype) + + outputs = { + "Y": [sync_batch_norm_out], + "MeanOut": [mean_out], + "VarianceOut": [variance_out], + "SavedMean": [saved_mean], + "SavedVariance": [saved_variance] + } + + helper.append_op( + type="sync_batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) return sync_batch_norm_out @@ -1173,22 +1284,66 @@ class SuperLayerNorm(nn.LayerNorm): begin_norm_axis = input_ndim - normalized_ndim feature_dim = int(input.shape[-1]) if self._weight_attr != False: - weight = self.weight[:feature_dim] + if self.weight.shape[0] <= feature_dim: + weight = self.weight + else: + weight = self.weight[:feature_dim] else: weight = None if self._bias_attr != False: - bias = self.bias[:feature_dim] + if self.bias.shape[0] <= feature_dim: + bias = self.bias + else: + bias = self.bias[:feature_dim] else: bias = None self.cur_config = {'prune_dim': feature_dim} if in_dygraph_mode(): - out, _, _, = _C_ops.final_state_layer_norm( + out, _, _ = _C_ops.final_state_layer_norm( input, weight, bias, self._epsilon, begin_norm_axis, False) + elif _in_legacy_dygraph(): + out, _, _ = _C_ops.layer_norm(input, weight, bias, 'epsilon', + self._epsilon, 'begin_norm_axis', + begin_norm_axis) else: - out, _, _ = core.ops.layer_norm(input, weight, bias, 'epsilon', - self._epsilon, 'begin_norm_axis', - begin_norm_axis) + check_variable_and_dtype(input, 'input', ['float32', 'float64'], + 'LayerNorm') + + inputs = dict() + inputs['X'] = [input] + if weight: + inputs['Scale'] = [weight] + if bias: + inputs['Bias'] = [bias] + attrs = { + "epsilon": self._epsilon, + "begin_norm_axis": begin_norm_axis + } + + helper = LayerObjectHelper('layer_norm') + + dtype = input.dtype + mean_out = helper.create_variable_for_type_inference( + dtype=dtype, stop_gradient=True) + variance_out = helper.create_variable_for_type_inference( + dtype=dtype, stop_gradient=True) + layer_norm_out = helper.create_variable_for_type_inference(dtype) + + helper.append_op( + type="layer_norm", + inputs=inputs, + outputs={ + "Y": layer_norm_out, + "Mean": mean_out, + "Variance": variance_out, + }, + attrs={ + "epsilon": self._epsilon, + "begin_norm_axis": begin_norm_axis + }) + return layer_norm_out + return out @@ -1274,7 +1429,10 @@ class SuperEmbedding(nn.Embedding): else: out_nc = self._embedding_dim - weight = self.weight[:, :out_nc] + if self.weight.shape[1] <= out_nc: + weight = self.weight + else: + weight = self.weight[:, :out_nc] self.cur_config = {'prune_dim': list(weight.shape)} return F.embedding( input, diff --git a/paddleslim/nas/ofa/layers_old.py b/paddleslim/nas/ofa/layers_old.py index 33d648c0..1a4cc61b 100644 --- a/paddleslim/nas/ofa/layers_old.py +++ b/paddleslim/nas/ofa/layers_old.py @@ -20,7 +20,10 @@ import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.dygraph_utils as dygraph_utils from paddle.fluid.data_feeder import check_variable_and_dtype -from paddle.fluid.framework import _varbase_creator +from paddle.fluid.framework import _varbase_creator, in_dygraph_mode, _in_legacy_dygraph, _non_static_mode +from paddle import _C_ops +from paddle.fluid.data_feeder import check_variable_and_dtype +from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper from paddle.fluid.dygraph.nn import InstanceNorm, Conv2D, Conv2DTranspose, BatchNorm from ...common import get_logger @@ -235,11 +238,13 @@ class SuperConv2D(fluid.dygraph.Conv2D): shape=[(_input_filter.shape[0] * _input_filter.shape[1]), -1]) _tmp_filter = _varbase_creator(dtype=_input_filter.dtype) - core.ops.matmul(_input_filter, - self.__getattr__('%dto%d_matrix' % - (src_ks, target_ks)), - _tmp_filter, 'transpose_X', False, - 'transpose_Y', False, "alpha", 1) + if _non_static_mode(): + _C_ops.matmul(_input_filter, + self.__getattr__('%dto%d_matrix' % + (src_ks, target_ks)), + _tmp_filter, 'transpose_X', False, + 'transpose_Y', False, "alpha", 1) + _tmp_filter = fluid.layers.reshape( _tmp_filter, shape=[ @@ -306,7 +311,12 @@ class SuperConv2D(fluid.dygraph.Conv2D): attrs = ('strides', self._stride, 'paddings', padding, 'dilations', self._dilation, 'groups', groups if groups else 1, 'use_cudnn', self._use_cudnn) - out = core.ops.conv2d(input, weight, *attrs) + if in_dygraph_mode(): + out = _C_ops.final_state_conv2d( + input, weight, self._stride, padding, "EXPLICIT", groups + if groups else 1, self._dilation, "NCHW", False, -1, False) + elif _in_legacy_dygraph(): + out = _C_ops.conv2d(input, weight, *attrs) elif self._l_type == 'depthwise_conv2d': attrs = ('strides', self._stride, 'paddings', padding, 'dilations', self._dilation, 'groups', groups @@ -540,11 +550,13 @@ class SuperConv2DTranspose(fluid.dygraph.Conv2DTranspose): shape=[(_input_filter.shape[0] * _input_filter.shape[1]), -1]) _tmp_filter = _varbase_creator(dtype=_input_filter.dtype) - core.ops.matmul(_input_filter, - self.__getattr__('%dto%d_matrix' % - (src_ks, target_ks)), - _tmp_filter, 'transpose_X', False, - 'transpose_Y', False, "alpha", 1) + if _non_static_mode(): + _C_ops.matmul(_input_filter, + self.__getattr__('%dto%d_matrix' % + (src_ks, target_ks)), + _tmp_filter, 'transpose_X', False, + 'transpose_Y', False, "alpha", 1) + _tmp_filter = fluid.layers.reshape( _tmp_filter, shape=[ @@ -607,10 +619,13 @@ class SuperConv2DTranspose(fluid.dygraph.Conv2DTranspose): else: padding = self._padding - op = getattr(core.ops, self._op_type) - out = op(input, weight, 'output_size', self._output_size, 'strides', - self._stride, 'paddings', padding, 'dilations', self._dilation, - 'groups', groups, 'use_cudnn', self._use_cudnn) + if _non_static_mode(): + op = getattr(_C_ops, self._op_type) + out = op(input, weight, 'output_size', self._output_size, 'strides', + self._stride, 'paddings', padding, 'dilations', + self._dilation, 'groups', groups, 'use_cudnn', + self._use_cudnn) + pre_bias = out out_nc = int(pre_bias.shape[1]) if self.bias is not None: @@ -749,15 +764,33 @@ class SuperSeparableConv2D(fluid.dygraph.Layer): weight = self.conv[0].weight[:in_nc] ### conv1 if self.conv[0]._l_type == 'conv2d': - attrs = ('strides', self.conv[0]._stride, 'paddings', - self.conv[0]._padding, 'dilations', self.conv[0]._dilation, - 'groups', in_nc, 'use_cudnn', self.conv[0]._use_cudnn) - out = core.ops.conv2d(input, weight, *attrs) + if in_dygraph_mode(): + out = _C_ops.final_state_conv2d( + input, weight, self.conv[0]._stride, self.conv[0]._padding, + "EXPLICIT", in_nc, self.conv[0]._dilation, "NCHW", False, + -1, False) + + elif _in_legacy_dygraph(): + attrs = ('strides', self.conv[0]._stride, 'paddings', + self.conv[0]._padding, 'dilations', + self.conv[0]._dilation, 'groups', in_nc, 'use_cudnn', + self.conv[0]._use_cudnn) + out = _C_ops.conv2d(input, weight, *attrs) + elif self.conv[0]._l_type == 'depthwise_conv2d': - attrs = ('strides', self.conv[0]._stride, 'paddings', - self.conv[0]._padding, 'dilations', self.conv[0]._dilation, - 'groups', in_nc, 'use_cudnn', self.conv[0]._use_cudnn) - out = core.ops.depthwise_conv2d(input, weight, *attrs) + if in_dygraph_mode(): + out = _C_ops.final_state_depthwise_conv2d( + input, weight, self.conv[0]._stride, self.conv[0]._padding, + "EXPLICIT", in_nc, self.conv[0]._dilation, "NCHW", False, + -1, False, False, self.conv[0]._use_cudnn) + + elif _in_legacy_dygraph(): + attrs = ('strides', self.conv[0]._stride, 'paddings', + self.conv[0]._padding, 'dilations', + self.conv[0]._dilation, 'groups', in_nc, 'use_cudnn', + self.conv[0]._use_cudnn) + + out = _C_ops.depthwise_conv2d(input, weight, *attrs) else: raise ValueError("conv type error") @@ -776,11 +809,19 @@ class SuperSeparableConv2D(fluid.dygraph.Layer): weight = self.conv[2].weight[:out_nc, :in_nc, :, :] if self.conv[2]._l_type == 'conv2d': - attrs = ('strides', self.conv[2]._stride, 'paddings', - self.conv[2]._padding, 'dilations', self.conv[2]._dilation, - 'groups', self.conv[2]._groups if self.conv[2]._groups else - 1, 'use_cudnn', self.conv[2]._use_cudnn) - out = core.ops.conv2d(norm_out, weight, *attrs) + if in_dygraph_mode(): + out = _C_ops.final_state_conv2d( + input, weight, self.conv[2]._stride, self.conv[2]._padding, + "EXPLICIT", self.conv[2]._groups if self.conv[2]._groups + else 1, self.conv[2]._dilation, "NCHW", False, -1, False) + + elif _in_legacy_dygraph(): + attrs = ('strides', self.conv[2]._stride, 'paddings', + self.conv[2]._padding, 'dilations', + self.conv[2]._dilation, 'groups', self.conv[2]._groups + if self.conv[2]._groups else 1, 'use_cudnn', + self.conv[2]._use_cudnn) + out = _C_ops.conv2d(norm_out, weight, *attrs) elif self.conv[2]._l_type == 'depthwise_conv2d': attrs = ('strides', self.conv[2]._stride, 'paddings', self.conv[2]._padding, 'dilations', self.conv[2]._dilation, @@ -847,8 +888,10 @@ class SuperLinear(fluid.dygraph.Linear): use_bias = True pre_bias = _varbase_creator(dtype=input.dtype) - core.ops.matmul(input, weight, pre_bias, 'transpose_X', False, - 'transpose_Y', False, "alpha", 1) + if _non_static_mode(): + _C_ops.matmul(input, weight, pre_bias, 'transpose_X', False, + 'transpose_Y', False, "alpha", 1) + if self._bias_attr != False: pre_act = dygraph_utils._append_bias_in_dygraph( pre_bias, bias, axis=len(input.shape) - 1) @@ -903,51 +946,91 @@ class SuperBatchNorm(fluid.dygraph.BatchNorm): "use_mkldnn", False, "fuse_with_relu", self._fuse_with_relu, "use_global_stats", self._use_global_stats, 'trainable_statistics', self._trainable_statistics) - try: - from paddle import _C_ops - from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph - if in_dygraph_mode(): - if feature_dim != self._mean.shape[0]: - batch_norm_out = _C_ops.final_state_batch_norm( - input, weight, bias, mean, variance, mean_out_tmp, - variance_out_tmp, *attrs) - self._mean[:feature_dim] = mean - self._variance[:feature_dim] = variance - mean_out[:feature_dim] = mean_out_tmp - variance_out[:feature_dim] = variance_out_tmp - else: - batch_norm_out = core.ops.batch_norm( - input, weight, bias, self._mean, self._variance, - mean_out, variance_out, *attrs) - elif _in_legacy_dygraph(): - if feature_dim != self._mean.shape[0]: - batch_norm_out = core.ops.batch_norm( - input, weight, bias, mean, variance, None, mean_out_tmp, - variance_out_tmp, *attrs) - self._mean[:feature_dim].set_value(mean) - self._variance[:feature_dim].set_value(variance) - mean_out[:feature_dim].set_value(mean_out_tmp) - variance_out[:feature_dim].set_value(variance_out_tmp) - else: - batch_norm_out = core.ops.batch_norm( - input, weight, bias, self._mean, self._variance, None, - mean_out, variance_out, *attrs) - except: + + if in_dygraph_mode(): if feature_dim != self._mean.shape[0]: - batch_norm_out = core.ops.batch_norm(input, weight, bias, mean, - variance, mean_out_tmp, - variance_out_tmp, *attrs) + batch_norm_out, t1, t2, t3, t4, _ = _C_ops.final_state_batch_norm( + input, weight, bias, mean, variance, self._momentum, + self._epsilon, self._data_layout, not self.training, + self._use_global_stats, self._trainable_statistics, False) + self._mean[:feature_dim] = mean + self._variance[:feature_dim] = variance + mean_out[:feature_dim] = mean_out_tmp + variance_out[:feature_dim] = variance_out_tmp + else: + batch_norm_out, t1, t2, t3, t4, _ = _C_ops.final_state_batch_norm( + input, weight, bias, mean, variance, self._momentum, + self._epsilon, self._data_layout, not self.training, + self._use_global_stats, self._trainable_statistics, False) + return batch_norm_out + + elif _in_legacy_dygraph(): + if feature_dim != self._mean.shape[0]: + batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm( + input, weight, bias, mean, variance, None, mean_out_tmp, + variance_out_tmp, *attrs) self._mean[:feature_dim].set_value(mean) self._variance[:feature_dim].set_value(variance) mean_out[:feature_dim].set_value(mean_out_tmp) variance_out[:feature_dim].set_value(variance_out_tmp) else: - batch_norm_out = core.ops.batch_norm( - input, weight, bias, self._mean, self._variance, mean_out, - variance_out, *attrs) + batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm( + input, weight, bias, self._mean, self._variance, None, + mean_out, variance_out, *attrs) + return batch_norm_out - return dygraph_utils._append_activation_in_dygraph( - batch_norm_out[0], act=self._act) + else: + check_variable_and_dtype( + input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm') + + # for static need dict + attrs = { + "momentum": self._momentum, + "epsilon": self._epsilon, + "is_test": not self.training, + "data_layout": self._data_layout, + "use_mkldnn": False, + "fuse_with_relu": False, + "use_global_stats": self._use_global_stats, + "trainable_statistics": self._trainable_statistics, + } + + inputs = { + "X": [input], + "Scale": [weight], + "Bias": [bias], + "Mean": [mean], + "Variance": [variance] + } + + helper = LayerObjectHelper('batch_norm') + + param_dtype = input.dtype if input.dtype != 'float16' else 'float32' + saved_mean = helper.create_variable_for_type_inference( + dtype=param_dtype, stop_gradient=True) + saved_variance = helper.create_variable_for_type_inference( + dtype=param_dtype, stop_gradient=True) + batch_norm_out = helper.create_variable_for_type_inference( + input.dtype) + + outputs = { + "Y": [batch_norm_out], + "MeanOut": [mean], + "VarianceOut": [variance], + "SavedMean": [saved_mean], + "SavedVariance": [saved_variance] + } + + if self.training or self._trainable_statistics: + # reserve_space is only used for training. + reserve_space = helper.create_variable_for_type_inference( + dtype=input.dtype, stop_gradient=True) + outputs["ReserveSpace"] = [reserve_space] + + helper.append_op( + type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) + + return batch_norm_out class SuperInstanceNorm(fluid.dygraph.InstanceNorm): @@ -973,9 +1056,14 @@ class SuperInstanceNorm(fluid.dygraph.InstanceNorm): scale = self.scale[:feature_dim] bias = self.bias[:feature_dim] - out, _, _ = core.ops.instance_norm(input, scale, bias, 'epsilon', - self._epsilon) - return out + if in_dygraph_mode(): + out = _C_ops.final_state_instance_norm(input, scale, bias, + self._epsilon) + return out + if _in_legacy_dygraph(): + out, _, _ = _C_ops.instance_norm(input, scale, bias, 'epsilon', + self._epsilon) + return out class SuperLayerNorm(fluid.dygraph.LayerNorm): @@ -1002,11 +1090,15 @@ class SuperLayerNorm(fluid.dygraph.LayerNorm): feature_dim = int(input.shape[-1]) weight = self.weight[:feature_dim] bias = self.bias[:feature_dim] - pre_act, _, _ = core.ops.layer_norm(input, weight, bias, 'epsilon', - self._epsilon, 'begin_norm_axis', - self._begin_norm_axis) - return dygraph_utils._append_activation_in_dygraph( - pre_act, act=self._act) + if in_dygraph_mode(): + pre_act, _, _, = _C_ops.final_state_layer_norm( + input, weight, bias, self._epsilon, self._begin_norm_axis, + False) + elif _in_legacy_dygraph(): + pre_act, _, _ = _C_ops.layer_norm(input, weight, bias, 'epsilon', + self._epsilon, 'begin_norm_axis', + self._begin_norm_axis) + return pre_act class SuperEmbedding(fluid.dygraph.Embedding): @@ -1039,7 +1131,11 @@ class SuperEmbedding(fluid.dygraph.Embedding): out_nc = self._size[-1] weight = self.weight[:, :out_nc] - return core.ops.lookup_table_v2( - weight, input, 'is_sparse', self._is_sparse, 'is_distributed', - self._is_distributed, 'remote_prefetch', self._remote_prefetch, - 'padding_idx', self._padding_idx) + if in_dygraph_mode(): + return _C_ops.final_state_embedding( + input, weight, self._padding_idx, self._is_sparse) + elif _in_legacy_dygraph(): + return _C_ops.lookup_table_v2( + weight, input, 'is_sparse', self._is_sparse, 'is_distributed', + self._is_distributed, 'remote_prefetch', self._remote_prefetch, + 'padding_idx', self._padding_idx) diff --git a/tests/dygraph/test_flops.py b/tests/dygraph/test_flops.py index 699d9526..baaedad7 100644 --- a/tests/dygraph/test_flops.py +++ b/tests/dygraph/test_flops.py @@ -65,7 +65,6 @@ class TestFLOPsCase2(unittest.TestCase): net = Net2() x = np.random.uniform(-1, 1, x_shape).astype('float32') y = np.random.uniform(-1, 1, y_shape).astype('float32') - inputs = [paddle.to_tensor(x), paddle.to_tensor(y)] FLOPs1 = flops(net, inputs, only_conv=False) shapes = [x_shape, y_shape] diff --git a/tests/test_dy2prog.py b/tests/test_dy2prog.py index 4ce2412f..aac931d7 100644 --- a/tests/test_dy2prog.py +++ b/tests/test_dy2prog.py @@ -1,6 +1,7 @@ import os import sys sys.path.append("../") +os.environ['FLAGS_enable_eager_mode'] = "1" import paddle import unittest from paddleslim.core import dygraph2program @@ -25,7 +26,6 @@ class Model(paddle.nn.Layer): class TestEagerDygraph2Program(unittest.TestCase): def setUp(self): - os.environ['FLAGS_enable_eager_mode'] = "1" self.prepare_inputs() self.prepare_layer() diff --git a/tests/test_ofa.py b/tests/test_ofa.py index 997793fc..74172938 100644 --- a/tests/test_ofa.py +++ b/tests/test_ofa.py @@ -323,6 +323,8 @@ class TestOFA(unittest.TestCase): for model_no in range(self.run_config.dynamic_batch_size[ idx]): output = ofa_model(self.data) + if (isinstance(output, tuple)): + output = output[0] loss = paddle.mean(output) if self.distill_config.mapping_layers != None: dis_loss = ofa_model.calc_distill_loss() diff --git a/tests/test_ofa_v2.py b/tests/test_ofa_v2.py index ae337505..4903aca6 100644 --- a/tests/test_ofa_v2.py +++ b/tests/test_ofa_v2.py @@ -297,34 +297,5 @@ class TestInputDict(unittest.TestCase): input_dtypes=['float32', 'float32']) -class TestInputDict(unittest.TestCase): - def setUp(self): - model = ModelInputDict() - - sp_net_config = supernet(expand_ratio=[0.5, 1.0]) - self.model = Convert(sp_net_config).convert(model) - self.images = paddle.randn(shape=[2, 3, 32, 32], dtype='float32') - self.images2 = { - 'data': paddle.randn( - shape=[2, 12, 32, 32], dtype='float32') - } - default_run_config = {'skip_layers': ['conv1.0', 'conv2.0']} - self.run_config = RunConfig(**default_run_config) - - self.ofa_model = OFA(self.model, run_config=self.run_config) - self.ofa_model._clear_search_space(self.images, data=self.images2) - - def test_export(self): - - config = self.ofa_model._sample_config( - task="expand_ratio", sample_type="smallest") - self.ofa_model.export( - config, - input_shapes=[[1, 3, 32, 32], { - 'data': [1, 12, 32, 32] - }], - input_dtypes=['float32', 'float32']) - - if __name__ == '__main__': unittest.main() -- GitLab