diff --git a/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.cc index 1b7318c5114e62c009fa7c8e824ffbc6235a8973..69716e91658800467a787b0674dad0e262f8abe1 100644 --- a/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.cc @@ -26,5 +26,8 @@ MS_REG_GPU_KERNEL_ONE( TensorAdd, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), TensorAddGpuFwdKernel, half) +MS_REG_GPU_KERNEL_ONE( + TensorAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + TensorAddGpuFwdKernel, int) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h index a203567aa80211b41c59952972e8afbcf6f31dee..4dfbf4c3d4305836415752aa435ac4b02f308fb5 100644 --- a/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h @@ -71,6 +71,9 @@ class TensorAddGpuFwdKernel : public GpuKernel { bool Init(const CNodePtr &kernel_node) { InitResource(); cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))]; + if (cudnn_data_type_ == CUDNN_DATA_INT32) { + cudnn_data_type_ = CUDNN_DATA_FLOAT; + } size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 2) { MS_LOG(ERROR) << "Input number is " << input_num << ", but cudnnAddTensor needs 2 inputs."; diff --git a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h b/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h index fd73f378d8738d31d082baa1d607a1caf25308b6..5c7153a172b2da0e6bc0be51584a709fc867a58c 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h @@ -101,7 +101,7 @@ class BiasAddGradGpuKernel : public GpuKernel { cudnnSetTensorNdDescriptorEx(db_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, SizeToInt(cudnn_dims), db_dims.get()), "cudnnSetTensorNdDescriptor failed"); CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetReduceTensorDescriptor(op_desc_, CUDNN_REDUCE_TENSOR_ADD, cudnn_data_type_, CUDNN_NOT_PROPAGATE_NAN, + cudnnSetReduceTensorDescriptor(op_desc_, CUDNN_REDUCE_TENSOR_ADD, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN, CUDNN_REDUCE_TENSOR_NO_INDICES, CUDNN_32BIT_INDICES), "cudnnSetReduceTensorDescriptor failed"); diff --git a/mindspore/train/amp.py b/mindspore/train/amp.py index 66e08874b288d063c03e64b582e0b778038bea6f..917b4c3359d0bcc3591545980cb62c7145b8d9a2 100644 --- a/mindspore/train/amp.py +++ b/mindspore/train/amp.py @@ -151,7 +151,7 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', **kwargs): loss_scale = loss_scale_manager.get_loss_scale() update_cell = loss_scale_manager.get_update_cell() if update_cell is not None: - if not context.get_context("enable_ge"): + if not (context.get_context("enable_ge") or (context.get_context("device_target") == "GPU")): raise ValueError("Only `loss_scale_manager=None` and " "`loss_scale_manager=FixedLossScaleManager(drop_overflow_update=False)`" "are supported in current version. If you use `O2` option, please"