From d3167da8afc7ef9bbfa700250c5782e562f9f106 Mon Sep 17 00:00:00 2001 From: wilfChen Date: Sat, 25 Jul 2020 19:55:19 +0800 Subject: [PATCH] gpu support stridedslice --- .../gpu/arrays/slice_gpu_kernel.cc | 6 - .../gpu/arrays/slice_gpu_kernel.h | 11 +- .../gpu/arrays/slice_grad_gpu_kernel.cc | 6 - .../gpu/arrays/slice_grad_gpu_kernel.h | 11 +- .../gpu/arrays/strided_slice_gpu_kernel.cc | 28 ++ .../gpu/arrays/strided_slice_gpu_kernel.h | 190 +++++++++++++ .../arrays/strided_slice_grad_gpu_kernel.cc | 28 ++ .../arrays/strided_slice_grad_gpu_kernel.h | 191 +++++++++++++ .../gpu/cuda_impl/slice_impl.cu | 210 +++++++------- .../gpu/cuda_impl/slice_impl.cuh | 25 +- tests/st/ops/gpu/test_stridedslice_grad_op.py | 261 ++++++++++++++++-- tests/st/ops/gpu/test_stridedslice_op.py | 84 ++++-- 12 files changed, 864 insertions(+), 187 deletions(-) create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.h mode change 100755 => 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu mode change 100755 => 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc index 4c9ff2b7f..37c1fc6d4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc @@ -24,11 +24,5 @@ MS_REG_GPU_KERNEL_ONE(Slice, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutp SliceGpuFwdKernel, int) MS_REG_GPU_KERNEL_ONE(Slice, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), SliceGpuFwdKernel, half) -MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - SliceGpuFwdKernel, float) -MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), - SliceGpuFwdKernel, half) -MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), - SliceGpuFwdKernel, int) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h index 40e35c183..8fc129ea4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h @@ -41,14 +41,9 @@ class SliceGpuFwdKernel : public GpuKernel { } T *input = GetDeviceAddress(inputs, 0); T *output = GetDeviceAddress(outputs, 0); - if (is_strided_slice_) { - CalStridedSlice(output_size_ / sizeof(T), input, input_shape_, begin_, size_, strides_, output, - reinterpret_cast(stream_ptr)); - } else { - Slice4DKernel(begin_[0], begin_[1], begin_[2], begin_[3], size_[0], size_[1], size_[2], size_[3], input_shape_[0], - input_shape_[1], input_shape_[2], input_shape_[3], input, output, - reinterpret_cast(stream_ptr)); - } + Slice4DKernel(begin_[0], begin_[1], begin_[2], begin_[3], size_[0], size_[1], size_[2], size_[3], input_shape_[0], + input_shape_[1], input_shape_[2], input_shape_[3], input, output, + reinterpret_cast(stream_ptr)); return true; } bool Init(const CNodePtr &kernel_node) override { diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc index 2eeb3acf7..da3353b82 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc @@ -29,11 +29,5 @@ MS_REG_GPU_KERNEL_ONE( SliceGrad, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), SliceGradGpuKernel, half) -MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - SliceGradGpuKernel, float) -MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), - SliceGradGpuKernel, int) -MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), - SliceGradGpuKernel, half) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h index 1a40b2bf2..45566c9d6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h @@ -38,13 +38,8 @@ class SliceGradGpuKernel : public GpuKernel { T *dy = GetDeviceAddress(inputs, 0); T *dx = GetDeviceAddress(outputs, 0); FillDeviceArray(outputs[0]->size / sizeof(T), dx, 0.f, reinterpret_cast(stream_ptr)); - if (is_strided_slice_) { - CalStridedSliceGrad(output_size_ / sizeof(T), dy, input_shape_, begin_, size_, strides_, dx, - reinterpret_cast(stream_ptr)); - } else { - CalSliceGrad(output_size_ / sizeof(T), dy, input_shape_, begin_, size_, dx, - reinterpret_cast(stream_ptr)); - } + CalSliceGrad(output_size_ / sizeof(T), dy, input_shape_, begin_, size_, dx, + reinterpret_cast(stream_ptr)); return true; } @@ -140,7 +135,7 @@ class SliceGradGpuKernel : public GpuKernel { size_t input_size_; size_t output_size_; size_t workspace_size_; -}; +}; // namespace kernel } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.cc new file mode 100644 index 000000000..5ecb9d2a5 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.cc @@ -0,0 +1,28 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + StridedSliceGpuKernel, float) +MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + StridedSliceGpuKernel, half) +MS_REG_GPU_KERNEL_ONE(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + StridedSliceGpuKernel, int) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h new file mode 100644 index 000000000..25e771c70 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h @@ -0,0 +1,190 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_STRIDED_SLICE_GPU_KERNEL_H +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_STRIDED_SLICE_GPU_KERNEL_H + +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh" + +namespace mindspore { +namespace kernel { +constexpr int MAX_DIMS = 4; +template +class StridedSliceGpuKernel : public GpuKernel { + public: + StridedSliceGpuKernel() : null_output_(false) {} + ~StridedSliceGpuKernel() override = default; + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + if (null_output_) { + return true; + } + + T *input = GetDeviceAddress(inputs, 0); + T *output = GetDeviceAddress(outputs, 0); + StridedSlice(input_shape_, begin_, strides_, output_shape_, input, output, + reinterpret_cast(stream_ptr)); + return true; + } + bool Init(const CNodePtr &kernel_node) override { + input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + if (input_shape_.size() > MAX_DIMS) { + MS_LOG(ERROR) << "StridedSlice support support dims less than " << input_shape_.size(); + return false; + } + + FillEmptyDims(kernel_node); + ParseMasks(kernel_node); + FillOutputDim(); + null_output_ = IsNullOutput(); + InitSizeLists(); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(input_shape_[0] * input_shape_[1] * input_shape_[2] * input_shape_[3] * sizeof(T)); + output_size_list_.push_back(output_shape_[0] * output_shape_[1] * output_shape_[2] * output_shape_[3] * sizeof(T)); + } + + private: + void FillEmptyDims(const CNodePtr &kernel_node) { + begin_ = GetAttr>(kernel_node, "begin"); + end_ = GetAttr>(kernel_node, "end"); + strides_ = GetAttr>(kernel_node, "strides"); + + for (size_t i = 0; i < MAX_DIMS; i++) { + if (i < begin_.size()) { + begin_[i] = + std::min(begin_[i] < 0 ? SizeToInt(begin_[i] + input_shape_[i]) : begin_[i], SizeToInt(input_shape_[i] - 1)); + } else { + begin_.push_back(0); + } + + if (i < end_.size()) { + end_[i] = std::max(end_[i] < 0 ? end_[i] + SizeToInt(input_shape_[i]) : end_[i], -1); + } else { + end_.push_back(i < input_shape_.size() ? input_shape_[i] : 1); + } + + if (i >= strides_.size()) { + strides_.push_back(1); + } + + if (i >= input_shape_.size()) { + input_shape_.push_back(1); + } + } + } + + void ParseMasks(const CNodePtr &kernel_node) { + auto begin_mask_int = GetAttr(kernel_node, "begin_mask"); + auto begin_mask = Dec2Bin(begin_mask_int); + for (size_t i = 0; i < begin_mask.size(); i++) { + if (begin_mask[i]) { + begin_[i] = 0; + } + } + + auto end_mask_int = GetAttr(kernel_node, "end_mask"); + auto end_mask = Dec2Bin(end_mask_int); + for (size_t j = 0; j < end_mask.size(); j++) { + if (end_mask[j]) { + end_[j] = input_shape_[j]; + } + } + + auto ellipsis_mask_int = GetAttr(kernel_node, "ellipsis_mask"); + auto ellipsis_mask = Dec2Bin(ellipsis_mask_int); + for (size_t k = 0; k < ellipsis_mask.size(); k++) { + if (ellipsis_mask[k]) { + begin_[k] = 0; + end_[k] = input_shape_[k]; + strides_[k] = 1; + } + } + + auto shrink_axis_mask_str = GetAttr(kernel_node, "shrink_axis_mask"); + auto shrink_axis_mask = Dec2Bin(shrink_axis_mask_str); + for (size_t l = 0; l < shrink_axis_mask.size(); l++) { + if (shrink_axis_mask[l]) { + end_[l] = end_[l] > begin_[l] ? begin_[l] + 1 : begin_[l] - 1; + strides_[l] = end_[l] > begin_[l] ? 1 : -1; + } + } + } + + std::vector Dec2Bin(const int &mask) { + auto mask_str = std::bitset(mask).to_string(); + int dim_idx = 0; + std::vector result = {false, false, false, false}; + for (int i = mask_str.size() - 1; i >= 0; i--) { + if (mask_str[i] == '1') { + result[dim_idx] = true; + } + dim_idx++; + } + return result; + } + + void FillOutputDim() { + for (int i = 0; i < MAX_DIMS; i++) { + if (begin_[i] <= end_[i] && strides_[i] > 0) { + output_shape_.push_back((end_[i] - 1 - begin_[i]) / strides_[i] + 1); + } else if (begin_[i] > end_[i] && strides_[i] < 0) { + output_shape_.push_back((end_[i] - begin_[i] + 1) / strides_[i] + 1); + } else { + output_shape_.push_back(0); + } + } + } + + bool IsNullOutput() { + for (int i = 0; i < MAX_DIMS; i++) { + if (begin_[i] >= end_[i] && strides_[i] > 0) { + return true; + } + if (begin_[i] < end_[i] && strides_[i] < 0) { + return true; + } + } + return false; + } + + std::vector begin_; + std::vector end_; + std::vector strides_; + std::vector input_shape_; + std::vector output_shape_; + int null_output_; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_STRIDED_SLICE_GPU_KERNEL_H diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.cc new file mode 100644 index 000000000..bbcce07a0 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.cc @@ -0,0 +1,28 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + StridedSliceGradGpuKernel, float) +MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + StridedSliceGradGpuKernel, half) +MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + StridedSliceGradGpuKernel, int) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.h new file mode 100644 index 000000000..3e16235d3 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.h @@ -0,0 +1,191 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_STRIDED_SLICE_GRAD_GPU_KERNEL_H +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_STRIDED_SLICE_GRAD_GPU_KERNEL_H + +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh" + +namespace mindspore { +namespace kernel { +constexpr int MAX_DIMS = 4; +template +class StridedSliceGradGpuKernel : public GpuKernel { + public: + StridedSliceGradGpuKernel() : null_output_(false) {} + ~StridedSliceGradGpuKernel() override = default; + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) override { + T *dy = GetDeviceAddress(inputs, 0); + T *dx = GetDeviceAddress(outputs, 0); + + FillDeviceArray(outputs[0]->size / sizeof(T), dx, 0.f, reinterpret_cast(stream_ptr)); + if (null_output_) { + return true; + } + + StridedSliceGrad(output_shape_, begin_, strides_, input_shape_, dy, dx, reinterpret_cast(stream_ptr)); + return true; + } + bool Init(const CNodePtr &kernel_node) override { + input_shape_ = GetAttr>(kernel_node, "shapex"); + if (input_shape_.size() > MAX_DIMS) { + MS_LOG(ERROR) << "StridedSliceGrad support support dims less than " << input_shape_.size(); + return false; + } + + FillEmptyDims(kernel_node); + ParseMasks(kernel_node); + FillOutputDim(); + null_output_ = IsNullOutput(); + InitSizeLists(); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(output_shape_[0] * output_shape_[1] * output_shape_[2] * output_shape_[3] * sizeof(T)); + output_size_list_.push_back(input_shape_[0] * input_shape_[1] * input_shape_[2] * input_shape_[3] * sizeof(T)); + } + + private: + void FillEmptyDims(const CNodePtr &kernel_node) { + begin_ = GetAttr>(kernel_node, "begin"); + end_ = GetAttr>(kernel_node, "end"); + strides_ = GetAttr>(kernel_node, "strides"); + + for (size_t i = 0; i < MAX_DIMS; i++) { + if (i < begin_.size()) { + begin_[i] = + std::min(begin_[i] < 0 ? SizeToInt(begin_[i] + input_shape_[i]) : begin_[i], SizeToInt(input_shape_[i] - 1)); + } else { + begin_.push_back(0); + } + + if (i < end_.size()) { + end_[i] = std::max(end_[i] < 0 ? end_[i] + SizeToInt(input_shape_[i]) : end_[i], -1); + } else { + end_.push_back(i < input_shape_.size() ? input_shape_[i] : 1); + } + + if (i >= strides_.size()) { + strides_.push_back(1); + } + + if (i >= input_shape_.size()) { + input_shape_.push_back(1); + } + } + } + + void ParseMasks(const CNodePtr &kernel_node) { + auto begin_mask_int = GetAttr(kernel_node, "begin_mask"); + auto begin_mask = Dec2Bin(begin_mask_int); + for (size_t i = 0; i < begin_mask.size(); i++) { + if (begin_mask[i]) { + begin_[i] = 0; + } + } + + auto end_mask_int = GetAttr(kernel_node, "end_mask"); + auto end_mask = Dec2Bin(end_mask_int); + for (size_t j = 0; j < end_mask.size(); j++) { + if (end_mask[j]) { + end_[j] = input_shape_[j]; + } + } + + auto ellipsis_mask_int = GetAttr(kernel_node, "ellipsis_mask"); + auto ellipsis_mask = Dec2Bin(ellipsis_mask_int); + for (size_t k = 0; k < ellipsis_mask.size(); k++) { + if (ellipsis_mask[k]) { + begin_[k] = 0; + end_[k] = input_shape_[k]; + strides_[k] = 1; + } + } + + auto shrink_axis_mask_str = GetAttr(kernel_node, "shrink_axis_mask"); + auto shrink_axis_mask = Dec2Bin(shrink_axis_mask_str); + for (size_t l = 0; l < shrink_axis_mask.size(); l++) { + if (shrink_axis_mask[l]) { + end_[l] = end_[l] > begin_[l] ? begin_[l] + 1 : begin_[l] - 1; + strides_[l] = end_[l] > begin_[l] ? 1 : -1; + } + } + } + + std::vector Dec2Bin(const int &mask) { + auto mask_str = std::bitset(mask).to_string(); + int dim_idx = 0; + std::vector result = {false, false, false, false}; + for (int i = mask_str.size() - 1; i >= 0; i--) { + if (mask_str[i] == '1') { + result[dim_idx] = true; + } + dim_idx++; + } + return result; + } + + void FillOutputDim() { + for (int i = 0; i < MAX_DIMS; i++) { + if (begin_[i] <= end_[i] && strides_[i] > 0) { + output_shape_.push_back((end_[i] - 1 - begin_[i]) / strides_[i] + 1); + } else if (begin_[i] > end_[i] && strides_[i] < 0) { + output_shape_.push_back((end_[i] - begin_[i] + 1) / strides_[i] + 1); + } else { + output_shape_.push_back(0); + } + } + } + + bool IsNullOutput() { + for (int i = 0; i < MAX_DIMS; i++) { + if (begin_[i] >= end_[i] && strides_[i] > 0) { + return true; + } + if (begin_[i] < end_[i] && strides_[i] < 0) { + return true; + } + } + return false; + } + + std::vector begin_; + std::vector end_; + std::vector strides_; + std::vector input_shape_; + std::vector output_shape_; + int null_output_; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_STRIDED_SLICE_GRAD_GPU_KERNEL_H diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu old mode 100755 new mode 100644 index dd4effc17..bd21a8008 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu @@ -21,48 +21,29 @@ #include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh" template -__global__ void Slice4D(const int s1, const int s2, const int s3, const int s4, - const int l1, const int l2, const int l3, const int l4, - const int d1, const int d2, const int d3, const int d4, - const T *input, T *output) { +__global__ void Slice4D(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2, + const int l3, const int l4, const int d1, const int d2, const int d3, const int d4, + const T *input, T *output) { for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (l1 * l2 * l3 * l4); pos += blockDim.x * gridDim.x) { int i = pos / (l2 * l3 * l4) % l1; int j = pos / (l3 * l4) % l2; int k = pos / l4 % l3; int o = pos % l4; - int offset = (i + s1) * (d2 * d3 * d4) + - (j + s2) * (d3 * d4) + - (k + s3) * d4 + - (o + s4); + int offset = (i + s1) * (d2 * d3 * d4) + (j + s2) * (d3 * d4) + (k + s3) * d4 + (o + s4); output[pos] = input[offset]; } } template -__global__ void SliceGrad(const T* dy, int p, int start, int length, T* output) { +__global__ void SliceGrad(const T *dy, int p, int start, int length, T *output) { for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (length); pos += blockDim.x * gridDim.x) { output[start + pos] = dy[p + pos]; } return; } + template -__global__ void StridedSlice(const T* input, int p, int start, int begin, int stride, int ended, T* output) { - for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < std::ceil(static_cast(ended - begin) / stride); - pos += blockDim.x * gridDim.x) { - output[p + pos] = input[start + pos * stride]; - } - return; -} -template -__global__ void StridedSliceGrad(const T* dy, int p, int start, int begin, int stride, int ended, T* dx) { - for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < std::ceil(static_cast(ended - begin) / stride); - pos += blockDim.x * gridDim.x) { - dx[start + pos * stride] = dy[p + pos]; - } - return; -} -template -__global__ void FillArray(T* addr, const size_t len, const float value) { +__global__ void FillArray(T *addr, const size_t len, const float value) { T value_ = static_cast(value); for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < len; pos += blockDim.x * gridDim.x) { addr[pos] = value_; @@ -70,21 +51,20 @@ __global__ void FillArray(T* addr, const size_t len, const float value) { return; } template -void FillDeviceArray(const size_t input_size, T* addr, const float value, cudaStream_t cuda_stream) { +void FillDeviceArray(const size_t input_size, T *addr, const float value, cudaStream_t cuda_stream) { FillArray<<>>(addr, input_size, value); return; } template -void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, - const int l1, const int l2, const int l3, const int l4, - const int d1, const int d2, const int d3, const int d4, - const T *input, T *output, cudaStream_t stream) { - Slice4D<<>>(s1, s2, s3, s4, l1, l2, l3, l4, - d1, d2, d3, d4, input, output); +void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2, const int l3, + const int l4, const int d1, const int d2, const int d3, const int d4, const T *input, T *output, + cudaStream_t stream) { + Slice4D<<>>(s1, s2, s3, s4, l1, l2, l3, l4, d1, d2, d3, d4, + input, output); } template -void CalSliceGrad(const size_t input_size, const T* dy, const std::vector in_shape, const std::vector begin, - const std::vector size, T* output, cudaStream_t cuda_stream) { +void CalSliceGrad(const size_t input_size, const T *dy, const std::vector in_shape, const std::vector begin, + const std::vector size, T *output, cudaStream_t cuda_stream) { int block = in_shape[1] * in_shape[2] * in_shape[3]; int map = in_shape[2] * in_shape[3]; int w = in_shape[3]; @@ -100,92 +80,100 @@ void CalSliceGrad(const size_t input_size, const T* dy, const std::vector i } } } + template -void CalStridedSlice(const size_t input_size, const T* input, const std::vector in_shape, - const std::vector begin, const std::vector end, const std::vector strides, - T* output, cudaStream_t cuda_stream) { - int block = in_shape[1] * in_shape[2] * in_shape[3]; - int map = in_shape[2] * in_shape[3]; - int w = in_shape[3]; - int ended = end[3]; - int p = 0; - int start = 0; - for (int i = begin[0]; i < ((end[0] > begin[0]) ? end[0] : (2 * begin[0] - end[0])); i += std::abs(strides[0])) { - for (int j = begin[1]; j < ((end[1] > begin[1]) ? end[1] : (2 * begin[1] - end[1])); j += std::abs(strides[1])) { - for (int k = begin[2]; k < ((end[2] > begin[2]) ? end[2] : (2 * begin[2] - end[2])); k += std::abs(strides[2])) { - start = (strides[0] > 0 ? i : 2 * begin[0] - i) * block + (strides[1] > 0 ? j : 2 * begin[1] - j) * map + - (strides[2] > 0 ? k : 2 * begin[2] - k) * w + begin[3]; - StridedSlice<<>>(input, p, start, begin[3], strides[3], - ended, output); - p = p + std::ceil(static_cast(end[3] - begin[3]) / strides[3]); - } - } +__global__ void StridedSliceKernel(const int b0, const int b1, const int b2, const int b3, const int s0, const int s1, + const int s2, const int s3, const int i0, const int i1, const int i2, const int i3, + const int o0, const int o1, const int o2, const int o3, const T *input_addr, + T *output_addr) { + int output_num = o0 * o1 * o2 * o3; + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < output_num; pos += blockDim.x * gridDim.x) { + int i = pos / (o1 * o2 * o3) % o0; + int j = pos / (o2 * o3) % o1; + int k = pos / o3 % o2; + int l = pos % o3; + + int input_idx = (i * s0 + b0) * i1 * i2 * i3 + (j * s1 + b1) * i2 * i3 + (k * s2 + b2) * i3 + (l * s3 + b3); + output_addr[pos] = input_addr[input_idx]; } } + template -void CalStridedSliceGrad(const size_t input_size, const T* dy, const std::vector in_shape, - const std::vector begin, const std::vector end, const std::vector strides, - T* dx, cudaStream_t cuda_stream) { - int block = in_shape[1] * in_shape[2] * in_shape[3]; - int map = in_shape[2] * in_shape[3]; - int w = in_shape[3]; - int ended = end[3]; - int p = 0; - int start = 0; - for (int i = begin[0]; i < ((end[0] > begin[0]) ? end[0] : (2 * begin[0] - end[0] + 1)); i += std::abs(strides[0])) { - for (int j = begin[1]; j < ((end[1] > begin[1]) ? end[1] : (2 * begin[1] - end[1] + 1)); - j += std::abs(strides[1])) { - for (int k = begin[2]; k < ((end[2] > begin[2]) ? end[2] : (2 * begin[2] - end[2] + 1)); - k += std::abs(strides[2])) { - start = (strides[0] > 0 ? i : 2 * begin[0] - i) * block + (strides[1] > 0 ? j : 2 * begin[1] - j) * map + - (strides[2] > 0 ? k : 2 * begin[2] - k) * w + begin[3]; - StridedSliceGrad<<>>(dy, p, start, begin[3], strides[3], - ended, dx); - p = p + std::ceil(static_cast(end[3] - begin[3]) / strides[3]); - } - } +void StridedSlice(const std::vector &input_shape, const std::vector &begin, + const std::vector &strides, const std::vector &output_shape, const T *input, T *output, + cudaStream_t cuda_stream) { + int size = output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3]; + StridedSliceKernel<<>>( + begin[0], begin[1], begin[2], begin[3], strides[0], strides[1], strides[2], strides[3], input_shape[0], + input_shape[1], input_shape[2], input_shape[3], output_shape[0], output_shape[1], output_shape[2], output_shape[3], + input, output); +} + +template +__global__ void StridedSliceGradKernel(const int b0, const int b1, const int b2, const int b3, const int s0, + const int s1, const int s2, const int s3, const int i0, const int i1, + const int i2, const int i3, const int o0, const int o1, const int o2, + const int o3, const T *dy, T *dx) { + int output_num = o0 * o1 * o2 * o3; + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < output_num; pos += blockDim.x * gridDim.x) { + int i = pos / (o1 * o2 * o3) % o0; + int j = pos / (o2 * o3) % o1; + int k = pos / o3 % o2; + int l = pos % o3; + + int input_idx = (i * s0 + b0) * i1 * i2 * i3 + (j * s1 + b1) * i2 * i3 + (k * s2 + b2) * i3 + (l * s3 + b3); + dx[input_idx] = dy[pos]; } + return; +} + +template +void StridedSliceGrad(const std::vector &dy_shape, const std::vector &begin, const std::vector &strides, + const std::vector &dx_shape, const T *dy, T *dx, cudaStream_t cuda_stream) { + int size = dy_shape[0] * dy_shape[1] * dy_shape[2] * dy_shape[3]; + StridedSliceGradKernel<<>>( + begin[0], begin[1], begin[2], begin[3], strides[0], strides[1], strides[2], strides[3], dx_shape[0], dx_shape[1], + dx_shape[2], dx_shape[3], dy_shape[0], dy_shape[1], dy_shape[2], dy_shape[3], dy, dx); } -template void FillDeviceArray(const size_t input_size, float* addr, const float value, cudaStream_t cuda_stream); -template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, - const int l1, const int l2, const int l3, const int l4, - const int d1, const int d2, const int d3, const int d4, +template void FillDeviceArray(const size_t input_size, float *addr, const float value, cudaStream_t cuda_stream); +template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2, + const int l3, const int l4, const int d1, const int d2, const int d3, const int d4, const float *input, float *output, cudaStream_t stream); -template void CalSliceGrad(const size_t input_size, const float* dy, const std::vector in_shape, - const std::vector begin, const std::vector size, float* output, +template void CalSliceGrad(const size_t input_size, const float *dy, const std::vector in_shape, + const std::vector begin, const std::vector size, float *output, cudaStream_t cuda_stream); -template void CalStridedSlice(const size_t input_size, const float* input, const std::vector in_shape, - const std::vector begin, const std::vector end, - const std::vector strides, float* output, cudaStream_t cuda_stream); -template void CalStridedSliceGrad(const size_t input_size, const float* dy, const std::vector in_shape, - const std::vector begin, const std::vector end, - const std::vector strides, float* dx, cudaStream_t cuda_stream); -template void FillDeviceArray(const size_t input_size, half* addr, const float value, cudaStream_t cuda_stream); -template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, - const int l1, const int l2, const int l3, const int l4, - const int d1, const int d2, const int d3, const int d4, +template void FillDeviceArray(const size_t input_size, half *addr, const float value, cudaStream_t cuda_stream); +template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2, + const int l3, const int l4, const int d1, const int d2, const int d3, const int d4, const half *input, half *output, cudaStream_t stream); -template void CalSliceGrad(const size_t input_size, const half* dy, const std::vector in_shape, - const std::vector begin, const std::vector size, half* output, +template void CalSliceGrad(const size_t input_size, const half *dy, const std::vector in_shape, + const std::vector begin, const std::vector size, half *output, cudaStream_t cuda_stream); -template void CalStridedSlice(const size_t input_size, const half* input, const std::vector in_shape, - const std::vector begin, const std::vector end, - const std::vector strides, half* output, cudaStream_t cuda_stream); -template void CalStridedSliceGrad(const size_t input_size, const half* dy, const std::vector in_shape, - const std::vector begin, const std::vector end, - const std::vector strides, half* dx, cudaStream_t cuda_stream); -template void FillDeviceArray(const size_t input_size, int* addr, const float value, cudaStream_t cuda_stream); -template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, - const int l1, const int l2, const int l3, const int l4, - const int d1, const int d2, const int d3, const int d4, +template void FillDeviceArray(const size_t input_size, int *addr, const float value, cudaStream_t cuda_stream); +template void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2, + const int l3, const int l4, const int d1, const int d2, const int d3, const int d4, const int *input, int *output, cudaStream_t stream); -template void CalSliceGrad(const size_t input_size, const int* dy, const std::vector in_shape, - const std::vector begin, const std::vector size, int* output, +template void CalSliceGrad(const size_t input_size, const int *dy, const std::vector in_shape, + const std::vector begin, const std::vector size, int *output, cudaStream_t cuda_stream); -template void CalStridedSlice(const size_t input_size, const int* input, const std::vector in_shape, - const std::vector begin, const std::vector end, - const std::vector strides, int* output, cudaStream_t cuda_stream); -template void CalStridedSliceGrad(const size_t input_size, const int* dy, const std::vector in_shape, - const std::vector begin, const std::vector end, - const std::vector strides, int* dx, cudaStream_t cuda_stream); + +template void StridedSlice(const std::vector &input_shape, const std::vector &begin, + const std::vector &strides, const std::vector &output_shape, const float *input, + float *output, cudaStream_t cuda_stream); +template void StridedSlice(const std::vector &input_shape, const std::vector &begin, + const std::vector &strides, const std::vector &output_shape, const half *input, + half *output, cudaStream_t cuda_stream); +template void StridedSlice(const std::vector &input_shape, const std::vector &begin, + const std::vector &strides, const std::vector &output_shape, const int *input, + int *output, cudaStream_t cuda_stream); + +template void StridedSliceGrad(const std::vector &dy_shape, const std::vector &begin, + const std::vector &strides, const std::vector &dx_shape, const float *dy, + float *dx, cudaStream_t cuda_stream); +template void StridedSliceGrad(const std::vector &dy_shape, const std::vector &begin, + const std::vector &strides, const std::vector &dx_shape, const half *dy, + half *dx, cudaStream_t cuda_stream); +template void StridedSliceGrad(const std::vector &dy_shape, const std::vector &begin, + const std::vector &strides, const std::vector &dx_shape, const int *dy, + int *dx, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh old mode 100755 new mode 100644 index e04f277c3..70b013174 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh @@ -21,23 +21,20 @@ #include #include "runtime/device/gpu/cuda_common.h" - template -void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, - const int l1, const int l2, const int l3, const int l4, - const int d1, const int d2, const int d3, const int d4, - const T *input, T *output, cudaStream_t stream); +void Slice4DKernel(const int s1, const int s2, const int s3, const int s4, const int l1, const int l2, const int l3, + const int l4, const int d1, const int d2, const int d3, const int d4, const T *input, T *output, + cudaStream_t stream); template -void CalSliceGrad(const size_t input_size, const T* input, const std::vector in_shape, - const std::vector begin, const std::vector size, T* output, cudaStream_t cuda_stream); +void CalSliceGrad(const size_t input_size, const T *input, const std::vector in_shape, + const std::vector begin, const std::vector size, T *output, cudaStream_t cuda_stream); template -void CalStridedSlice(const size_t input_size, const T* input, const std::vector in_shape, - const std::vector begin, const std::vector end, const std::vector strides, - T* output, cudaStream_t cuda_stream); +void StridedSlice(const std::vector &input_shape, const std::vector &begin, + const std::vector &strides, const std::vector &output_shape, const T *input, T *output, + cudaStream_t cuda_stream); template -void CalStridedSliceGrad(const size_t input_size, const T* dy, const std::vector in_shape, - const std::vector begin, const std::vector end, const std::vector strides, - T* dx, cudaStream_t cuda_stream); +void StridedSliceGrad(const std::vector &dy_shape, const std::vector &begin, const std::vector &strides, + const std::vector &dx_shape, const T *dy, T *dx, cudaStream_t cuda_stream); template -void FillDeviceArray(const size_t input_size, T* addr, const float value, cudaStream_t cuda_stream); +void FillDeviceArray(const size_t input_size, T *addr, const float value, cudaStream_t cuda_stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SLICEIMPL_H_ diff --git a/tests/st/ops/gpu/test_stridedslice_grad_op.py b/tests/st/ops/gpu/test_stridedslice_grad_op.py index 2ab813685..f13de813d 100644 --- a/tests/st/ops/gpu/test_stridedslice_grad_op.py +++ b/tests/st/ops/gpu/test_stridedslice_grad_op.py @@ -19,31 +19,258 @@ import pytest import mindspore.context as context import mindspore.nn as nn from mindspore import Tensor -from mindspore.common.api import ms_function from mindspore.ops import operations as P -from mindspore.ops.operations import _grad_ops as G +from mindspore.ops import composite as C context.set_context(mode=context.GRAPH_MODE, device_target='GPU') -class StridedSliceGrad(nn.Cell): - def __init__(self): - super(StridedSliceGrad, self).__init__() - self.ssg = G.StridedSliceGrad() - self.shape = P.Shape() +class StridedSliceNet(nn.Cell): + def __init__(self, begin, end, stride, begin_mask=0, end_mask=0, ellipsis_mask=0): + super(StridedSliceNet, self).__init__() + self.begin = begin + self.end = end + self.strides = stride + self.slice = P.StridedSlice(begin_mask, end_mask, ellipsis_mask) - @ms_function - def construct(self, dy, x): - return self.ssg(dy, self.shape(x), (2, 0, 0), (3, 2, 3), (1, 1, 1)) + def construct(self, x): + return self.slice(x, self.begin, self.end, self.strides) +class GradData(nn.Cell): + def __init__(self, network): + super(GradData, self).__init__() + self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=False) + self.network = network + + def construct(self, x): + return self.grad(self.network)(x) @pytest.mark.level0 @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard -def test_slice(): - x = Tensor(np.array([[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 7, 8]]]).astype(np.float32)) - dy = Tensor(np.array([[[5., 1., 5.], [6., 1., 8.]]]).astype(np.float32)) - ssg = StridedSliceGrad() - output = ssg(dy, x) - expect = [[[0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0]], [[5, 1, 5], [6, 1, 8]]] - assert (output.asnumpy() == expect).all() +def test_strided_slice_grad(): + x = Tensor(np.arange(0, 2*3*4*5).reshape(2, 3, 4, 5).astype(np.float32)) + net = StridedSliceNet((1, 0, 0, 2), (2, 2, 2, 4), (1, 1, 1, 1)) + dx = GradData(net)(x) + expect = np.array([[[[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]]], + + + [[[0., 0., 1., 1., 0.], + [0., 0., 1., 1., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 1., 1., 0.], + [0., 0., 1., 1., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]]]]) + assert np.allclose(dx[0].asnumpy(), expect) + + net = StridedSliceNet((1, 0, 0, 5), (2, 2, 2, 1), (1, 1, 1, -2)) + dx = GradData(net)(x) + expect = np.array([[[[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]]], + + + [[[0., 0., 1., 0., 1.], + [0., 0., 1., 0., 1.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 1., 0., 1.], + [0., 0., 1., 0., 1.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]]]]) + assert np.allclose(dx[0].asnumpy(), expect) + + + net = StridedSliceNet((1, 0, 0, -1), (2, 2, 2, 1), (1, 1, 1, -1)) + dx = GradData(net)(x) + expect = np.array([[[[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]]], + + + [[[0., 0., 1., 1., 1.], + [0., 0., 1., 1., 1.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 1., 1., 1.], + [0., 0., 1., 1., 1.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]]]]) + assert np.allclose(dx[0].asnumpy(), expect) + + # ME infer fault + # y = GradData()(x, (1, 0, -1, -2), (2, 2, 0, -5), (1, 1, -1, -2)) + # expect = np.array([[[[0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.]], + + # [[0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.]], + + # [[0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.]]], + + + # [[[0., 0., 0., 0., 0.], + # [0., 1., 0., 1., 0.], + # [0., 1., 0., 1., 0.], + # [0., 1., 0., 1., 0.]], + + # [[0., 0., 0., 0., 0.], + # [0., 1., 0., 1., 0.], + # [0., 1., 0., 1., 0.], + # [0., 1., 0., 1., 0.]],begin_mask=0b1000, end_mask=0b0010, ellipsis_mask=0b0100 + + # [[0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.]]]]) + # assert np.allclose(y.asnumpy(), expect) + + # y = Grad(begin_mask=0b1000, end_mask=0b0010)(x, (1, 0, 0, 2), (2, 2, 2, 4), (1, 1, 1, 1)) + # expect = np.array([[[[0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.]], + + # [[0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.]], + + # [[0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.]]], + + + # [[[0., 0., 1., 1., 0.], + # [0., 0., 1., 1., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.]], + + # [[0., 0., 1., 1., 0.], + # [0., 0., 1., 1., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.]], + + # [[0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.], + # [0., 0., 0., 0., 0.]]]]) + # assert np.allclose(y.asnumpy(), expect) + + + net = StridedSliceNet((1, 0, 0, 2), (2, 2, 2, 4), (1, 1, 1, 1), + begin_mask=0b1000, end_mask=0b0010, ellipsis_mask=0b0100) + dx = GradData(net)(x) + expect = np.array([[[[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]]], + + + [[[1., 1., 1., 1., 0.], + [1., 1., 1., 1., 0.], + [1., 1., 1., 1., 0.], + [1., 1., 1., 1., 0.]], + + [[1., 1., 1., 1., 0.], + [1., 1., 1., 1., 0.], + [1., 1., 1., 1., 0.], + [1., 1., 1., 1., 0.]], + + [[1., 1., 1., 1., 0.], + [1., 1., 1., 1., 0.], + [1., 1., 1., 1., 0.], + [1., 1., 1., 1., 0.]]]]) + assert np.allclose(dx[0].asnumpy(), expect) + + x = Tensor(np.arange(0, 3*4*5).reshape(3, 4, 5).astype(np.float32)) + net = StridedSliceNet((1, 0, 0), (2, -3, 3), (1, 1, 3)) + dx = GradData(net)(x) + expect = np.array([[[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[1., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]], + + [[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.]]]) + assert np.allclose(dx[0].asnumpy(), expect) diff --git a/tests/st/ops/gpu/test_stridedslice_op.py b/tests/st/ops/gpu/test_stridedslice_op.py index d6e477693..642fee4b4 100644 --- a/tests/st/ops/gpu/test_stridedslice_op.py +++ b/tests/st/ops/gpu/test_stridedslice_op.py @@ -17,29 +17,79 @@ import numpy as np import pytest import mindspore.context as context -import mindspore.nn as nn from mindspore import Tensor from mindspore.ops import operations as P context.set_context(mode=context.GRAPH_MODE, device_target='GPU') -class StridedSlice(nn.Cell): - def __init__(self): - super(StridedSlice, self).__init__() - self.stridedslice = P.StridedSlice() - - def construct(self, x): - return self.stridedslice(x, (2, 0, 0), (3, 2, 3), (1, 1, 1)) - - @pytest.mark.level0 @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard -def test_slice(): - x = Tensor(np.array([[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 7, 8]]]).astype(np.int32)) - stridedslice = StridedSlice() - output = stridedslice(x) - expect = [[[5., 5., 5.], - [6., 7., 8.]]] - assert (output.asnumpy() == expect).all() +def test_stridedslice(): + x = Tensor(np.arange(0, 2*3*4*5).reshape(2, 3, 4, 5).astype(np.float32)) + y = P.StridedSlice()(x, (1, 0, 0, 2), (2, 2, 2, 4), (1, 1, 1, 1)) + expect = np.array([[[[62, 63], + [67, 68]], + [[82, 83], + [87, 88]]]]) + assert np.allclose(y.asnumpy(), expect) + + y = P.StridedSlice()(x, (1, 0, 0, 5), (2, 2, 2, 1), (1, 1, 1, -2)) + expect = np.array([[[[64, 62], + [69, 67]], + [[84, 82], + [89, 87]]]]) + assert np.allclose(y.asnumpy(), expect) + + y = P.StridedSlice()(x, (1, 0, 0, -1), (2, 2, 2, 1), (1, 1, 1, -1)) + expect = np.array([[[[64, 63, 62], + [69, 68, 67]], + [[84, 83, 82], + [89, 88, 87]]]]) + assert np.allclose(y.asnumpy(), expect) + + # ME infer fault + # y = P.StridedSlice()(x, (1, 0, -1, -2), (2, 2, 0, -5), (1, 1, -1, -2)) + # expect = np.array([[[[78, 76], + # [73, 71], + # [68, 66]], + # [[98, 96], + # [93, 91], + # [88, 86]]]]) + # assert np.allclose(y.asnumpy(), expect) + + # y = P.StridedSlice(begin_mask=0b1000, end_mask=0b0010)(x, (1, 0, 0, 2), (2, 2, 2, 4), (1, 1, 1, 1)) + # expect = np.array([[[[ 62, 63], + # [ 67, 68]], + # [[ 82, 83], + # [ 87, 88]], + # [[102, 103], + # [107, 108]]]]) + # assert np.allclose(y.asnumpy(), expect) + + op = P.StridedSlice(begin_mask=0b1000, end_mask=0b0010, ellipsis_mask=0b0100) + y = op(x, (1, 0, 0, 2), (2, 2, 2, 4), (1, 1, 1, 1)) + expect = np.array([[[[60, 61, 62, 63], + [65, 66, 67, 68], + [70, 71, 72, 73], + [75, 76, 77, 78]], + [[80, 81, 82, 83], + [85, 86, 87, 88], + [90, 91, 92, 93], + [95, 96, 97, 98]], + [[100, 101, 102, 103], + [105, 106, 107, 108], + [110, 111, 112, 113], + [115, 116, 117, 118]]]]) + assert np.allclose(y.asnumpy(), expect) + + x = Tensor(np.arange(0, 3*4*5).reshape(3, 4, 5).astype(np.float32)) + y = P.StridedSlice()(x, (1, 0, 0), (2, -3, 3), (1, 1, 3)) + expect = np.array([[[20]]]) + assert np.allclose(y.asnumpy(), expect) + + x_np = np.arange(0, 4*5).reshape(4, 5).astype(np.float32) + y = Tensor(x_np)[:, ::-1] + expect = x_np[:, ::-1] + assert np.allclose(y.asnumpy(), expect) -- GitLab