diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h index bae315d1c141919d4d4ce3c705a78db3d7a71caf..3014477a4820a3381079ab0a84204d5562b7198e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h @@ -74,12 +74,12 @@ class ConcatV2GpuFwdKernel : public GpuKernel { inputs_host_ = std::make_unique(input_num_); len_axis_ = std::make_unique(input_num_); for (int i = 0; i < input_num_; i++) { - int input_size = 1; + size_t input_size = 1; auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); for (size_t j = 0; j < input_shape.size(); j++) { - input_size *= SizeToInt(input_shape[j]); + input_size *= input_shape[j]; } - input_size_list_.push_back(IntToSize(input_size * sizeof(T))); + input_size_list_.push_back(input_size * sizeof(T)); len_axis_[i] = SizeToInt(input_shape[axis_]); } workspace_size_list_.push_back(sizeof(T *) * input_num_); @@ -97,7 +97,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel { all_size_before_axis_ *= output_shape[i]; } } - output_size_list_.push_back(IntToSize(output_size_ * sizeof(T))); + output_size_list_.push_back(output_size_ * sizeof(T)); InitSizeLists(); return true; @@ -117,7 +117,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel { } int axis_; int input_num_; - int output_size_; + size_t output_size_; int all_size_before_axis_; int all_size_axis_; std::unique_ptr inputs_host_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.h index b26c01ee106135976420ed0b10f1ce2c2842a24f..b1593675ba249b36806ae83967cb089c573466c2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.h @@ -83,7 +83,7 @@ class SplitGpuFwdKernel : public GpuKernel { all_size_before_axis_ *= input_shape[i]; } } - input_size_list_.push_back(IntToSize(input_size_ * sizeof(T))); + input_size_list_.push_back(input_size_ * sizeof(T)); axis_step_ = input_shape[axis_] / output_num_; for (int i = 0; i < output_num_; i++) { @@ -138,7 +138,7 @@ class SplitGpuFwdKernel : public GpuKernel { } int axis_; int output_num_; - int input_size_; + size_t input_size_; int axis_step_; int all_size_before_axis_; int all_size_axis_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu index c3a77d186d70d960bdce2e1015fe3f7888a4432f..4866d61dd99954863bd0221c09228c94cdb19ec5 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu @@ -19,7 +19,7 @@ #include #include "backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh" template -__global__ void Concat(const int size, const int input_num, +__global__ void Concat(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, T** inputs, T* output) { for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim.x) { @@ -45,7 +45,7 @@ __global__ void Concat(const int size, const int input_num, } template -void ConcatKernel(const int size, const int input_num, +void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, T** inputs, T* output, cudaStream_t cuda_stream) { @@ -55,15 +55,15 @@ void ConcatKernel(const int size, const int input_num, return; } -template void ConcatKernel(const int size, const int input_num, +template void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, float** inputs, float* output, cudaStream_t cuda_stream); -template void ConcatKernel(const int size, const int input_num, +template void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, int** inputs, int* output, cudaStream_t cuda_stream); -template void ConcatKernel(const int size, const int input_num, +template void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, half** inputs, half* output, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh index 010e2977e2366fb51fd653150d825c42323cad45..6e469e8028efa93285d0b2d1e6e976cd3d7f3637 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh @@ -19,7 +19,7 @@ #include "runtime/device/gpu/cuda_common.h" template -void ConcatKernel(const int size, const int input_num, +void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, T** inputs, T* output, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu index a2422908601f114dcfaabe148f421717038f9e32..e892a3b47dcff4f1763b55456dd994aa4443706f 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu @@ -19,7 +19,7 @@ #include #include "backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh" template -__global__ void Split(const int size, const int axis_step, const int all_size_before_axis, +__global__ void Split(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const T* input, T** outputs) { for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) { int num = pos % all_size_before_axis / all_size_axis; @@ -32,19 +32,19 @@ __global__ void Split(const int size, const int axis_step, const int all_size_be } template -void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const T* input, T** outputs, cudaStream_t cuda_stream) { Split<<>>(size, axis_step, all_size_before_axis, all_size_axis, input, outputs); return; } -template void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +template void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const float* input, float** outputs, cudaStream_t cuda_stream); -template void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +template void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const int* input, int** outputs, cudaStream_t cuda_stream); -template void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +template void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const half* input, half** outputs, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh index 5306648da89e1a74ccf99b161c7663f899537f26..b8abce290d4034eaffc13bf6771343d14cf230cc 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh @@ -19,6 +19,6 @@ #include "runtime/device/gpu/cuda_common.h" template -void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const T* input, T** outputs, cudaStream_t cuda_stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPLIT_H_