From 666efc2336e41ae27ae810d2cf9a39c74f40e936 Mon Sep 17 00:00:00 2001 From: AshburnLee <1578034415@qq.com> Date: Wed, 3 Feb 2021 18:26:29 +0800 Subject: [PATCH] Call new cudnn batch norm API regardless of data type and data layout (#30157) --- paddle/fluid/operators/batch_norm_op.cu | 273 +++++++++--------- paddle/fluid/operators/inplace_abn_op.cc | 3 + python/paddle/fluid/dygraph/nn.py | 16 +- python/paddle/fluid/layers/nn.py | 28 +- .../tests/unittests/test_batch_norm_op.py | 12 +- .../unittests/test_sync_batch_norm_op.py | 4 +- python/paddle/nn/functional/norm.py | 7 +- 7 files changed, 156 insertions(+), 187 deletions(-) diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index 2d5b395ac6..ae9cf2838b 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -114,7 +114,7 @@ class BatchNormKernel << "CUDNN_BN_MIN_EPSILON instead."; } epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); -#if CUDNN_VERSION_MIN(7, 0, 0) +#if CUDNN_VERSION_MIN(7, 0, 1) if (FLAGS_cudnn_batchnorm_spatial_persistent) { mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; } else { @@ -122,7 +122,7 @@ class BatchNormKernel } #else mode_ = CUDNN_BATCHNORM_SPATIAL; -#endif +#endif // CUDNN_VERSION_MIN(7, 0, 1) VLOG(3) << "Setting descriptors."; std::vector dims; @@ -151,7 +151,10 @@ class BatchNormKernel auto handle = dev_ctx.cudnn_handle(); // Now, depending on whether we are running test or not, we have two paths. - if (test_mode || use_global_stats) { + // It is training mode when it's not reference AND not using pre-trained + // model. + bool training = !test_mode && !use_global_stats; + if (!training) { // only when test we use input to do computation. const auto *est_mean = ctx.Input("Mean"); const auto *est_var = ctx.Input("Variance"); @@ -234,72 +237,70 @@ class BatchNormKernel bool called = false; #if CUDNN_VERSION_MIN(7, 4, 1) - if (compute_format == DataLayout::kNHWC) { - called = true; - size_t workspace_size = 0; - size_t reserve_space_size = 0; - void *reserve_space_ptr = nullptr; - void *workspace_ptr = nullptr; - Tensor workspace_tensor; - // Create reserve space and workspace for batch norm. - // Create tensor for each batchnorm op, it will be used in the - // backward. Thus this tensor shouldn't be temp. - auto *reserve_space = ctx.Output("ReserveSpace"); - PADDLE_ENFORCE_NOT_NULL( - reserve_space, - platform::errors::NotFound( - "The argument ReserveSpace of batch_norm op is not found.")); - - // --------------- cudnn batchnorm workspace --------------- - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload:: - cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( - /*handle=*/handle, - /*mode=*/mode_, - /*bnIps=*/CUDNN_BATCHNORM_OPS_BN, - /*xDesc=*/data_desc_, - /*zDesc=*/nullptr, - /*yDesc=*/data_desc_, - /*bnScaleBiasMeanVarDesc=*/bn_param_desc_, - /*activationDesc=*/nullptr, - /*sizeInBytes=*/&workspace_size)); - - // -------------- cudnn batchnorm reserve space -------------- - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload:: - cudnnGetBatchNormalizationTrainingExReserveSpaceSize( - /*handle=*/handle, - /*mode=*/mode_, - /*bnOps=*/CUDNN_BATCHNORM_OPS_BN, - /*activationDesc=*/nullptr, - /*xDesc=*/data_desc_, - /*sizeInBytes=*/&reserve_space_size)); - - reserve_space_ptr = reserve_space->mutable_data( - ctx.GetPlace(), transformed_x.type(), reserve_space_size); - workspace_ptr = workspace_tensor.mutable_data( - ctx.GetPlace(), transformed_x.type(), workspace_size); - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload::cudnnBatchNormalizationForwardTrainingEx( - handle, mode_, CUDNN_BATCHNORM_OPS_BN, - CudnnDataType::kOne(), CudnnDataType::kZero(), - data_desc_, transformed_x.template data(), nullptr, - nullptr, data_desc_, transformed_y.template data(), - bn_param_desc_, scale->template data>(), - bias->template data>(), this_factor, - mean_out->template mutable_data>( - ctx.GetPlace()), - variance_out->template mutable_data>( - ctx.GetPlace()), - epsilon, - saved_mean->template mutable_data>( - ctx.GetPlace()), - saved_variance->template mutable_data>( - ctx.GetPlace()), - nullptr, workspace_ptr, workspace_size, reserve_space_ptr, - reserve_space_size)); - } -#endif + called = true; + size_t workspace_size = 0; + size_t reserve_space_size = 0; + void *reserve_space_ptr = nullptr; + void *workspace_ptr = nullptr; + Tensor workspace_tensor; + // Create reserve space and workspace for batch norm. + // Create tensor for each batchnorm op, it will be used in the + // backward. Thus this tensor shouldn't be temp. + auto *reserve_space = ctx.Output("ReserveSpace"); + PADDLE_ENFORCE_NOT_NULL( + reserve_space, + platform::errors::NotFound( + "The argument ReserveSpace of batch_norm op is not found.")); + + // --------------- cudnn batchnorm workspace --------------- + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload:: + cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( + /*handle=*/handle, + /*mode=*/mode_, + /*bnIps=*/CUDNN_BATCHNORM_OPS_BN, + /*xDesc=*/data_desc_, + /*zDesc=*/nullptr, + /*yDesc=*/data_desc_, + /*bnScaleBiasMeanVarDesc=*/bn_param_desc_, + /*activationDesc=*/nullptr, + /*sizeInBytes=*/&workspace_size)); + + // -------------- cudnn batchnorm reserve space -------------- + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload:: + cudnnGetBatchNormalizationTrainingExReserveSpaceSize( + /*handle=*/handle, + /*mode=*/mode_, + /*bnOps=*/CUDNN_BATCHNORM_OPS_BN, + /*activationDesc=*/nullptr, + /*xDesc=*/data_desc_, + /*sizeInBytes=*/&reserve_space_size)); + + reserve_space_ptr = reserve_space->mutable_data( + ctx.GetPlace(), transformed_x.type(), reserve_space_size); + workspace_ptr = workspace_tensor.mutable_data( + ctx.GetPlace(), transformed_x.type(), workspace_size); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnBatchNormalizationForwardTrainingEx( + handle, mode_, CUDNN_BATCHNORM_OPS_BN, CudnnDataType::kOne(), + CudnnDataType::kZero(), data_desc_, + transformed_x.template data(), nullptr, nullptr, data_desc_, + transformed_y.template data(), bn_param_desc_, + scale->template data>(), + bias->template data>(), this_factor, + mean_out->template mutable_data>( + ctx.GetPlace()), + variance_out->template mutable_data>( + ctx.GetPlace()), + epsilon, + saved_mean->template mutable_data>( + ctx.GetPlace()), + saved_variance->template mutable_data>( + ctx.GetPlace()), + nullptr, workspace_ptr, workspace_size, reserve_space_ptr, + reserve_space_size)); +#endif // CUDNN_VERSION_MIN(7, 4, 1) if (!called) { PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnBatchNormalizationForwardTraining( @@ -640,7 +641,7 @@ class BatchNormGradKernel << "CUDNN_BN_MIN_EPSILON instead."; } epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); -#if CUDNN_VERSION_MIN(7, 0, 0) +#if CUDNN_VERSION_MIN(7, 0, 1) if (FLAGS_cudnn_batchnorm_spatial_persistent) { mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; } else { @@ -648,7 +649,7 @@ class BatchNormGradKernel } #else mode_ = CUDNN_BATCHNORM_SPATIAL; -#endif +#endif // CUDNN_VERSION_MIN(7, 0, 1) PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( data_desc_, CudnnDataType::type, @@ -672,74 +673,73 @@ class BatchNormGradKernel num, transformed_x.data(), grid2, block, stream); } + // This branch calls CUDNN APIs if (d_scale && d_bias) { bool called = false; #if CUDNN_VERSION_MIN(7, 4, 1) - if (compute_format == DataLayout::kNHWC) { - called = true; - size_t workspace_size = 0; - void *workspace_ptr = nullptr; - Tensor workspace_tensor; - auto reserve_space_size = reserve_space->memory_size(); - // --------------- cudnn batchnorm workspace --------------- - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload:: - cudnnGetBatchNormalizationBackwardExWorkspaceSize( - /*handle=*/dev_ctx.cudnn_handle(), - /*mode=*/mode_, - /*bnIps=*/CUDNN_BATCHNORM_OPS_BN, - /*xDesc=*/data_desc_, - /*yDesc=*/data_desc_, - /*dyDesc=*/data_desc_, - /*dzDesc=*/nullptr, - /*dxDesc=*/data_desc_, - /*bnScaleBiasMeanVarDesc=*/bn_param_desc_, - /*activationDesc=*/nullptr, - /*sizeInBytes=*/&workspace_size)); - - workspace_ptr = workspace_tensor.mutable_data( - ctx.GetPlace(), transformed_x.type(), workspace_size); - - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload::cudnnBatchNormalizationBackwardEx( - /*handle=*/dev_ctx.cudnn_handle(), - /*mode=*/mode_, - /*bnOps=*/CUDNN_BATCHNORM_OPS_BN, - /*alphaDataDiff=*/CudnnDataType::kOne(), - /*betaDataDiff=*/CudnnDataType::kZero(), - /*alphaParamDiff=*/CudnnDataType::kOne(), - /*betaParamDiff=*/CudnnDataType::kZero(), - /*xDesc=*/data_desc_, - /*xData=*/transformed_x.template data(), - /*yDesc=*/nullptr, - /*yData=*/nullptr, - /*dyDesc=*/data_desc_, - /*dyData=*/transformed_d_y.template data(), - /*dzDesc=*/nullptr, - /*dzData=*/nullptr, - /*dxDesc=*/data_desc_, - /*dxData=*/transformed_d_x.template mutable_data( - ctx.GetPlace()), - /*dBnScaleBiasDesc=*/bn_param_desc_, - /*bnScaleData=*/scale->template data>(), - /*bnBiasData=*/nullptr, - /*dBnScaleData=*/d_scale - ->template mutable_data>( - ctx.GetPlace()), - /*dBnBiasData=*/d_bias - ->template mutable_data>( - ctx.GetPlace()), - /*epsilon=*/epsilon, - /*savedMean=*/saved_mean_data, - /*savedInvVariance=*/saved_var_data, - /*activationDesc=*/nullptr, - /*workspace=*/workspace_ptr, - /*workSpaceSizeInBytes=*/workspace_size, - /*reserveSpace=*/const_cast( - reserve_space->template data()), - /*reserveSpaceSizeInBytes=*/reserve_space_size)); - } -#endif + called = true; + size_t workspace_size = 0; + void *workspace_ptr = nullptr; + Tensor workspace_tensor; + auto reserve_space_size = reserve_space->memory_size(); + // --------------- cudnn batchnorm workspace --------------- + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload:: + cudnnGetBatchNormalizationBackwardExWorkspaceSize( + /*handle=*/dev_ctx.cudnn_handle(), + /*mode=*/mode_, + /*bnIps=*/CUDNN_BATCHNORM_OPS_BN, + /*xDesc=*/data_desc_, + /*yDesc=*/data_desc_, + /*dyDesc=*/data_desc_, + /*dzDesc=*/nullptr, + /*dxDesc=*/data_desc_, + /*bnScaleBiasMeanVarDesc=*/bn_param_desc_, + /*activationDesc=*/nullptr, + /*sizeInBytes=*/&workspace_size)); + + workspace_ptr = workspace_tensor.mutable_data( + ctx.GetPlace(), transformed_x.type(), workspace_size); + + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnBatchNormalizationBackwardEx( + /*handle=*/dev_ctx.cudnn_handle(), + /*mode=*/mode_, + /*bnOps=*/CUDNN_BATCHNORM_OPS_BN, + /*alphaDataDiff=*/CudnnDataType::kOne(), + /*betaDataDiff=*/CudnnDataType::kZero(), + /*alphaParamDiff=*/CudnnDataType::kOne(), + /*betaParamDiff=*/CudnnDataType::kZero(), + /*xDesc=*/data_desc_, + /*xData=*/transformed_x.template data(), + /*yDesc=*/nullptr, + /*yData=*/nullptr, + /*dyDesc=*/data_desc_, + /*dyData=*/transformed_d_y.template data(), + /*dzDesc=*/nullptr, + /*dzData=*/nullptr, + /*dxDesc=*/data_desc_, + /*dxData=*/transformed_d_x.template mutable_data( + ctx.GetPlace()), + /*dBnScaleBiasDesc=*/bn_param_desc_, + /*bnScaleData=*/scale->template data>(), + /*bnBiasData=*/nullptr, + /*dBnScaleData=*/d_scale + ->template mutable_data>( + ctx.GetPlace()), + /*dBnBiasData=*/d_bias + ->template mutable_data>( + ctx.GetPlace()), + /*epsilon=*/epsilon, + /*savedMean=*/saved_mean_data, + /*savedInvVariance=*/saved_var_data, + /*activationDesc=*/nullptr, + /*workspace=*/workspace_ptr, + /*workSpaceSizeInBytes=*/workspace_size, + /*reserveSpace=*/const_cast( + reserve_space->template data()), + /*reserveSpaceSizeInBytes=*/reserve_space_size)); +#endif // CUDNN_VERSION_MIN(7, 4, 1) if (!called) { PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnBatchNormalizationBackward( @@ -764,6 +764,7 @@ class BatchNormGradKernel ctx, &transformed_d_x, d_x); } } else { + // This branch call CUDA kernels if (compute_format == DataLayout::kNCHW) { if (d_x) { BNBackwardData<<< diff --git a/paddle/fluid/operators/inplace_abn_op.cc b/paddle/fluid/operators/inplace_abn_op.cc index c8589b0f22..652c071be6 100644 --- a/paddle/fluid/operators/inplace_abn_op.cc +++ b/paddle/fluid/operators/inplace_abn_op.cc @@ -178,6 +178,9 @@ class InplaceABNOpGradMaker : public framework::SingleGradOpMaker { op->SetInput("Bias", this->Input("Bias")); op->SetInput("SavedMean", this->Output("SavedMean")); op->SetInput("SavedVariance", this->Output("SavedVariance")); + if (this->HasOutput("ReserveSpace")) { + op->SetInput("ReserveSpace", this->Output("ReserveSpace")); + } // used when setting use_global_stats True during training if (BOOST_GET_CONST(bool, this->GetAttr("use_global_stats"))) { diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 74ee233612..6decff69ad 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -1309,12 +1309,6 @@ class BatchNorm(layers.Layer): dtype=self._dtype) self._variance.stop_gradient = True - self._has_reserve_space = False - if data_layout == 'NHWC': - flag = os.environ.get('FLAGS_cudnn_batchnorm_spatial_persistent') - if flag is not None and flag.lower() in ['true', '1']: - self._has_reserve_space = True - self._in_place = in_place self._data_layout = data_layout self._momentum = momentum @@ -1341,7 +1335,6 @@ class BatchNorm(layers.Layer): batch_norm_out, _, _, _, _, _ = core.ops.batch_norm( input, self.weight, self.bias, self._mean, self._variance, mean_out, variance_out, *attrs) - return dygraph_utils._append_activation_in_dygraph( batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn) @@ -1371,11 +1364,8 @@ class BatchNorm(layers.Layer): dtype=self._dtype, stop_gradient=True) saved_variance = self._helper.create_variable_for_type_inference( dtype=self._dtype, stop_gradient=True) - - reserve_space = None - if self._has_reserve_space: - reserve_space = self._helper.create_variable_for_type_inference( - dtype=core.VarDesc.VarType.FP16, stop_gradient=True) + reserve_space = self._helper.create_variable_for_type_inference( + dtype=self._helper.input_dtype(input), stop_gradient=True) batch_norm_out = input if self._in_place else self._helper.create_variable_for_type_inference( self._dtype) @@ -1388,7 +1378,7 @@ class BatchNorm(layers.Layer): "SavedVariance": [saved_variance] } if reserve_space is not None: - outputs["ReserveSpace"] = reserve_space + outputs["ReserveSpace"] = [reserve_space] self._helper.append_op( type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 8f3e88a67c..8d96e46f83 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -2792,12 +2792,6 @@ def batch_norm(input, 'batch_norm') dtype = helper.input_dtype() - has_reserve_space = False - if data_layout == 'NHWC': - flag = os.environ.get('FLAGS_cudnn_batchnorm_spatial_persistent') - if flag is not None and flag.lower() in ['true', '1']: - has_reserve_space = True - # use fp32 for bn parameter if dtype == core.VarDesc.VarType.FP16: dtype = core.VarDesc.VarType.FP32 @@ -2845,17 +2839,16 @@ def batch_norm(input, # create output # mean and mean_out share the same memory mean_out = mean - # variance and variance out share the same memory + # variance and variance_out share the same memory variance_out = variance saved_mean = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=True) saved_variance = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=True) - reserve_space = None - if has_reserve_space: + if not is_test: reserve_space = helper.create_variable_for_type_inference( - dtype=core.VarDesc.VarType.FP16, stop_gradient=True) + dtype=helper.input_dtype(), stop_gradient=True) batch_norm_out = input if in_place else \ helper.create_variable_for_type_inference(dtype) @@ -2998,12 +2991,6 @@ def inplace_abn(input, 'inplace_abn') dtype = helper.input_dtype() - has_reserve_space = False - if data_layout == 'NHWC': - flag = os.environ.get('FLAGS_cudnn_batchnorm_spatial_persistent') - if flag is not None and flag.lower() in ['true', '1']: - has_reserve_space = True - input_shape = input.shape if data_layout == 'NCHW': channel_num = input_shape[1] @@ -3053,12 +3040,8 @@ def inplace_abn(input, dtype=dtype, stop_gradient=True) saved_variance = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=True) - - reserve_space = None - if has_reserve_space: - reserve_space = helper.create_variable_for_type_inference( - dtype=core.VarDesc.VarType.FP16, stop_gradient=True) - + reserve_space = helper.create_variable_for_type_inference( + dtype=dtype, stop_gradient=True) batch_norm_out = input inputs = { @@ -3082,7 +3065,6 @@ def inplace_abn(input, inputs['MomemtumTensor'] = momentum else: attrs['momentum'] = momentum - outputs = { "Y": batch_norm_out, "MeanOut": mean_out, diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py index 14a30d15ae..2eb334d095 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py @@ -440,16 +440,8 @@ class TestBatchNormOpTraining(unittest.TestCase): "SavedMean": block.var('saved_mean'), "SavedVariance": block.var('saved_variance') } - has_reserve_space = False - if data_format == 'NHWC': - flag = os.environ.get( - 'FLAGS_cudnn_batchnorm_spatial_persistent') - if flag is not None and flag.lower() in ['true', '1']: - has_reserve_space = True - if has_reserve_space: - block.create_var(name="reserve_space", dtype='float16') - outputs["ReserveSpace"] = block.var('reserve_space') - del os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] + block.create_var(name="reserve_space", dtype='float32') + outputs["ReserveSpace"] = block.var('reserve_space') bn_op = block.append_op( type="batch_norm", inputs=inputs, diff --git a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py index baac0af5d6..4649323b5b 100644 --- a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py @@ -122,7 +122,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase): if not only_forward: others = [ 'batch_norm_0.tmp_0', 'batch_norm_0.tmp_1', 'bn_scale@GRAD', - 'bn_bias@GRAD', 'batch_norm_0.tmp_2@GRAD', 'conv2d_0.tmp_0@GRAD' + 'bn_bias@GRAD', 'batch_norm_0.tmp_3@GRAD', 'conv2d_0.tmp_0@GRAD' ] fetch_names += others bn_fetches = exe.run(program=main, @@ -142,7 +142,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase): if not only_forward: others = [ 'batch_norm_0.tmp_0', 'batch_norm_0.tmp_1', 'bn_scale@GRAD', - 'bn_bias@GRAD', 'batch_norm_0.tmp_2@GRAD', 'conv2d_0.tmp_0@GRAD' + 'bn_bias@GRAD', 'batch_norm_0.tmp_3@GRAD', 'conv2d_0.tmp_0@GRAD' ] fetch_names += others for nm in fetch_names: diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index fcda579332..050b9bce61 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -166,7 +166,6 @@ def batch_norm(x, batch_norm_out = paddle.nn.functional.batch_norm(x, rm, rv, w, b) print(batch_norm_out) """ - assert len(x.shape) >= 2, "input dim must be larger than 1" # input ad out must share the memory @@ -196,7 +195,6 @@ def batch_norm(x, batch_norm_out, _, _, _, _, _ = core.ops.batch_norm( x, weight, bias, running_mean, running_var, mean_out, variance_out, *attrs) - return dygraph_utils._append_activation_in_dygraph( batch_norm_out, act=None) @@ -230,13 +228,16 @@ def batch_norm(x, saved_variance = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=True) batch_norm_out = helper.create_variable_for_type_inference(dtype) + reserve_space = helper.create_variable_for_type_inference( + dtype=x.dtype, stop_gradient=True) outputs = { "Y": [batch_norm_out], "MeanOut": [running_mean], "VarianceOut": [running_var], "SavedMean": [saved_mean], - "SavedVariance": [saved_variance] + "SavedVariance": [saved_variance], + "ReserveSpace": [reserve_space] } helper.append_op( -- GitLab