From b750e3e130f90e937c817bbf123c1b5e8b38770e Mon Sep 17 00:00:00 2001 From: zhaoting Date: Sat, 18 Jul 2020 14:20:29 +0800 Subject: [PATCH] fix gpu Split and Concat memory allocation bug --- .../kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h | 10 +++++----- .../kernel_compiler/gpu/arrays/split_gpu_kernel.h | 4 ++-- .../kernel_compiler/gpu/cuda_impl/concatv2_impl.cu | 10 +++++----- .../kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh | 2 +- .../kernel_compiler/gpu/cuda_impl/split_impl.cu | 10 +++++----- .../kernel_compiler/gpu/cuda_impl/split_impl.cuh | 2 +- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h index bae315d1c..3014477a4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h @@ -74,12 +74,12 @@ class ConcatV2GpuFwdKernel : public GpuKernel { inputs_host_ = std::make_unique(input_num_); len_axis_ = std::make_unique(input_num_); for (int i = 0; i < input_num_; i++) { - int input_size = 1; + size_t input_size = 1; auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); for (size_t j = 0; j < input_shape.size(); j++) { - input_size *= SizeToInt(input_shape[j]); + input_size *= input_shape[j]; } - input_size_list_.push_back(IntToSize(input_size * sizeof(T))); + input_size_list_.push_back(input_size * sizeof(T)); len_axis_[i] = SizeToInt(input_shape[axis_]); } workspace_size_list_.push_back(sizeof(T *) * input_num_); @@ -97,7 +97,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel { all_size_before_axis_ *= output_shape[i]; } } - output_size_list_.push_back(IntToSize(output_size_ * sizeof(T))); + output_size_list_.push_back(output_size_ * sizeof(T)); InitSizeLists(); return true; @@ -117,7 +117,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel { } int axis_; int input_num_; - int output_size_; + size_t output_size_; int all_size_before_axis_; int all_size_axis_; std::unique_ptr inputs_host_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.h index b26c01ee1..b1593675b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.h @@ -83,7 +83,7 @@ class SplitGpuFwdKernel : public GpuKernel { all_size_before_axis_ *= input_shape[i]; } } - input_size_list_.push_back(IntToSize(input_size_ * sizeof(T))); + input_size_list_.push_back(input_size_ * sizeof(T)); axis_step_ = input_shape[axis_] / output_num_; for (int i = 0; i < output_num_; i++) { @@ -138,7 +138,7 @@ class SplitGpuFwdKernel : public GpuKernel { } int axis_; int output_num_; - int input_size_; + size_t input_size_; int axis_step_; int all_size_before_axis_; int all_size_axis_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu index c3a77d186..4866d61dd 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu @@ -19,7 +19,7 @@ #include #include "backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh" template -__global__ void Concat(const int size, const int input_num, +__global__ void Concat(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, T** inputs, T* output) { for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim.x) { @@ -45,7 +45,7 @@ __global__ void Concat(const int size, const int input_num, } template -void ConcatKernel(const int size, const int input_num, +void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, T** inputs, T* output, cudaStream_t cuda_stream) { @@ -55,15 +55,15 @@ void ConcatKernel(const int size, const int input_num, return; } -template void ConcatKernel(const int size, const int input_num, +template void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, float** inputs, float* output, cudaStream_t cuda_stream); -template void ConcatKernel(const int size, const int input_num, +template void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, int** inputs, int* output, cudaStream_t cuda_stream); -template void ConcatKernel(const int size, const int input_num, +template void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, half** inputs, half* output, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh index 010e2977e..6e469e802 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh @@ -19,7 +19,7 @@ #include "runtime/device/gpu/cuda_common.h" template -void ConcatKernel(const int size, const int input_num, +void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, T** inputs, T* output, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu index a24229086..e892a3b47 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu @@ -19,7 +19,7 @@ #include #include "backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh" template -__global__ void Split(const int size, const int axis_step, const int all_size_before_axis, +__global__ void Split(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const T* input, T** outputs) { for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) { int num = pos % all_size_before_axis / all_size_axis; @@ -32,19 +32,19 @@ __global__ void Split(const int size, const int axis_step, const int all_size_be } template -void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const T* input, T** outputs, cudaStream_t cuda_stream) { Split<<>>(size, axis_step, all_size_before_axis, all_size_axis, input, outputs); return; } -template void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +template void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const float* input, float** outputs, cudaStream_t cuda_stream); -template void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +template void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const int* input, int** outputs, cudaStream_t cuda_stream); -template void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +template void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const half* input, half** outputs, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh index 5306648da..b8abce290 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh @@ -19,6 +19,6 @@ #include "runtime/device/gpu/cuda_common.h" template -void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const T* input, T** outputs, cudaStream_t cuda_stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPLIT_H_ -- GitLab