Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
d3167da8
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d3167da8
编写于
7月 25, 2020
作者:
W
wilfChen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
gpu support stridedslice
上级
0fac402a
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
864 addition
and
187 deletion
+864
-187
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc
...rc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc
+0
-6
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
...src/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
+3
-8
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc
...ckend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc
+0
-6
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
...ackend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
+3
-8
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.cc
...nd/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.cc
+28
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h
...end/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h
+190
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.cc
...rnel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.cc
+28
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.h
...ernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.h
+191
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu
...ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu
+99
-111
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh
...csrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh
+11
-14
tests/st/ops/gpu/test_stridedslice_grad_op.py
tests/st/ops/gpu/test_stridedslice_grad_op.py
+244
-17
tests/st/ops/gpu/test_stridedslice_op.py
tests/st/ops/gpu/test_stridedslice_op.py
+67
-17
未找到文件。
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc
浏览文件 @
d3167da8
...
@@ -24,11 +24,5 @@ MS_REG_GPU_KERNEL_ONE(Slice, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutp
...
@@ -24,11 +24,5 @@ MS_REG_GPU_KERNEL_ONE(Slice, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutp
SliceGpuFwdKernel
,
int
)
SliceGpuFwdKernel
,
int
)
MS_REG_GPU_KERNEL_ONE
(
Slice
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
MS_REG_GPU_KERNEL_ONE
(
Slice
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
SliceGpuFwdKernel
,
half
)
SliceGpuFwdKernel
,
half
)
MS_REG_GPU_KERNEL_ONE
(
StridedSlice
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
SliceGpuFwdKernel
,
float
)
MS_REG_GPU_KERNEL_ONE
(
StridedSlice
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
SliceGpuFwdKernel
,
half
)
MS_REG_GPU_KERNEL_ONE
(
StridedSlice
,
KernelAttr
().
AddInputAttr
(
kNumberTypeInt32
).
AddOutputAttr
(
kNumberTypeInt32
),
SliceGpuFwdKernel
,
int
)
}
// namespace kernel
}
// namespace kernel
}
// namespace mindspore
}
// namespace mindspore
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
浏览文件 @
d3167da8
...
@@ -41,14 +41,9 @@ class SliceGpuFwdKernel : public GpuKernel {
...
@@ -41,14 +41,9 @@ class SliceGpuFwdKernel : public GpuKernel {
}
}
T
*
input
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
T
*
input
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
T
*
output
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
T
*
output
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
if
(
is_strided_slice_
)
{
Slice4DKernel
(
begin_
[
0
],
begin_
[
1
],
begin_
[
2
],
begin_
[
3
],
size_
[
0
],
size_
[
1
],
size_
[
2
],
size_
[
3
],
input_shape_
[
0
],
CalStridedSlice
(
output_size_
/
sizeof
(
T
),
input
,
input_shape_
,
begin_
,
size_
,
strides_
,
output
,
input_shape_
[
1
],
input_shape_
[
2
],
input_shape_
[
3
],
input
,
output
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
}
else
{
Slice4DKernel
(
begin_
[
0
],
begin_
[
1
],
begin_
[
2
],
begin_
[
3
],
size_
[
0
],
size_
[
1
],
size_
[
2
],
size_
[
3
],
input_shape_
[
0
],
input_shape_
[
1
],
input_shape_
[
2
],
input_shape_
[
3
],
input
,
output
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
}
return
true
;
return
true
;
}
}
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
...
...
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc
浏览文件 @
d3167da8
...
@@ -29,11 +29,5 @@ MS_REG_GPU_KERNEL_ONE(
...
@@ -29,11 +29,5 @@ MS_REG_GPU_KERNEL_ONE(
SliceGrad
,
SliceGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
SliceGradGpuKernel
,
half
)
SliceGradGpuKernel
,
half
)
MS_REG_GPU_KERNEL_ONE
(
StridedSliceGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
SliceGradGpuKernel
,
float
)
MS_REG_GPU_KERNEL_ONE
(
StridedSliceGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeInt32
).
AddOutputAttr
(
kNumberTypeInt32
),
SliceGradGpuKernel
,
int
)
MS_REG_GPU_KERNEL_ONE
(
StridedSliceGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
SliceGradGpuKernel
,
half
)
}
// namespace kernel
}
// namespace kernel
}
// namespace mindspore
}
// namespace mindspore
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
浏览文件 @
d3167da8
...
@@ -38,13 +38,8 @@ class SliceGradGpuKernel : public GpuKernel {
...
@@ -38,13 +38,8 @@ class SliceGradGpuKernel : public GpuKernel {
T
*
dy
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
T
*
dy
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
T
*
dx
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
T
*
dx
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
FillDeviceArray
(
outputs
[
0
]
->
size
/
sizeof
(
T
),
dx
,
0.
f
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
FillDeviceArray
(
outputs
[
0
]
->
size
/
sizeof
(
T
),
dx
,
0.
f
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
if
(
is_strided_slice_
)
{
CalSliceGrad
(
output_size_
/
sizeof
(
T
),
dy
,
input_shape_
,
begin_
,
size_
,
dx
,
CalStridedSliceGrad
(
output_size_
/
sizeof
(
T
),
dy
,
input_shape_
,
begin_
,
size_
,
strides_
,
dx
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
}
else
{
CalSliceGrad
(
output_size_
/
sizeof
(
T
),
dy
,
input_shape_
,
begin_
,
size_
,
dx
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
}
return
true
;
return
true
;
}
}
...
@@ -140,7 +135,7 @@ class SliceGradGpuKernel : public GpuKernel {
...
@@ -140,7 +135,7 @@ class SliceGradGpuKernel : public GpuKernel {
size_t
input_size_
;
size_t
input_size_
;
size_t
output_size_
;
size_t
output_size_
;
size_t
workspace_size_
;
size_t
workspace_size_
;
};
};
// namespace kernel
}
// namespace kernel
}
// namespace kernel
}
// namespace mindspore
}
// namespace mindspore
...
...
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.cc
0 → 100644
浏览文件 @
d3167da8
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h"
namespace
mindspore
{
namespace
kernel
{
MS_REG_GPU_KERNEL_ONE
(
StridedSlice
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
StridedSliceGpuKernel
,
float
)
MS_REG_GPU_KERNEL_ONE
(
StridedSlice
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
StridedSliceGpuKernel
,
half
)
MS_REG_GPU_KERNEL_ONE
(
StridedSlice
,
KernelAttr
().
AddInputAttr
(
kNumberTypeInt32
).
AddOutputAttr
(
kNumberTypeInt32
),
StridedSliceGpuKernel
,
int
)
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h
0 → 100644
浏览文件 @
d3167da8
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_STRIDED_SLICE_GPU_KERNEL_H
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_STRIDED_SLICE_GPU_KERNEL_H
#include <vector>
#include <bitset>
#include <algorithm>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
namespace
mindspore
{
namespace
kernel
{
constexpr
int
MAX_DIMS
=
4
;
template
<
typename
T
>
class
StridedSliceGpuKernel
:
public
GpuKernel
{
public:
StridedSliceGpuKernel
()
:
null_output_
(
false
)
{}
~
StridedSliceGpuKernel
()
override
=
default
;
const
std
::
vector
<
size_t
>
&
GetInputSizeList
()
const
override
{
return
input_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetOutputSizeList
()
const
override
{
return
output_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetWorkspaceSizeList
()
const
override
{
return
workspace_size_list_
;
}
bool
Launch
(
const
std
::
vector
<
AddressPtr
>
&
inputs
,
const
std
::
vector
<
AddressPtr
>
&
,
const
std
::
vector
<
AddressPtr
>
&
outputs
,
void
*
stream_ptr
)
override
{
if
(
null_output_
)
{
return
true
;
}
T
*
input
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
T
*
output
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
StridedSlice
(
input_shape_
,
begin_
,
strides_
,
output_shape_
,
input
,
output
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
return
true
;
}
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
input_shape_
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
if
(
input_shape_
.
size
()
>
MAX_DIMS
)
{
MS_LOG
(
ERROR
)
<<
"StridedSlice support support dims less than "
<<
input_shape_
.
size
();
return
false
;
}
FillEmptyDims
(
kernel_node
);
ParseMasks
(
kernel_node
);
FillOutputDim
();
null_output_
=
IsNullOutput
();
InitSizeLists
();
return
true
;
}
protected:
void
InitSizeLists
()
override
{
input_size_list_
.
push_back
(
input_shape_
[
0
]
*
input_shape_
[
1
]
*
input_shape_
[
2
]
*
input_shape_
[
3
]
*
sizeof
(
T
));
output_size_list_
.
push_back
(
output_shape_
[
0
]
*
output_shape_
[
1
]
*
output_shape_
[
2
]
*
output_shape_
[
3
]
*
sizeof
(
T
));
}
private:
void
FillEmptyDims
(
const
CNodePtr
&
kernel_node
)
{
begin_
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"begin"
);
end_
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"end"
);
strides_
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"strides"
);
for
(
size_t
i
=
0
;
i
<
MAX_DIMS
;
i
++
)
{
if
(
i
<
begin_
.
size
())
{
begin_
[
i
]
=
std
::
min
(
begin_
[
i
]
<
0
?
SizeToInt
(
begin_
[
i
]
+
input_shape_
[
i
])
:
begin_
[
i
],
SizeToInt
(
input_shape_
[
i
]
-
1
));
}
else
{
begin_
.
push_back
(
0
);
}
if
(
i
<
end_
.
size
())
{
end_
[
i
]
=
std
::
max
(
end_
[
i
]
<
0
?
end_
[
i
]
+
SizeToInt
(
input_shape_
[
i
])
:
end_
[
i
],
-
1
);
}
else
{
end_
.
push_back
(
i
<
input_shape_
.
size
()
?
input_shape_
[
i
]
:
1
);
}
if
(
i
>=
strides_
.
size
())
{
strides_
.
push_back
(
1
);
}
if
(
i
>=
input_shape_
.
size
())
{
input_shape_
.
push_back
(
1
);
}
}
}
void
ParseMasks
(
const
CNodePtr
&
kernel_node
)
{
auto
begin_mask_int
=
GetAttr
<
int
>
(
kernel_node
,
"begin_mask"
);
auto
begin_mask
=
Dec2Bin
(
begin_mask_int
);
for
(
size_t
i
=
0
;
i
<
begin_mask
.
size
();
i
++
)
{
if
(
begin_mask
[
i
])
{
begin_
[
i
]
=
0
;
}
}
auto
end_mask_int
=
GetAttr
<
int
>
(
kernel_node
,
"end_mask"
);
auto
end_mask
=
Dec2Bin
(
end_mask_int
);
for
(
size_t
j
=
0
;
j
<
end_mask
.
size
();
j
++
)
{
if
(
end_mask
[
j
])
{
end_
[
j
]
=
input_shape_
[
j
];
}
}
auto
ellipsis_mask_int
=
GetAttr
<
int
>
(
kernel_node
,
"ellipsis_mask"
);
auto
ellipsis_mask
=
Dec2Bin
(
ellipsis_mask_int
);
for
(
size_t
k
=
0
;
k
<
ellipsis_mask
.
size
();
k
++
)
{
if
(
ellipsis_mask
[
k
])
{
begin_
[
k
]
=
0
;
end_
[
k
]
=
input_shape_
[
k
];
strides_
[
k
]
=
1
;
}
}
auto
shrink_axis_mask_str
=
GetAttr
<
int
>
(
kernel_node
,
"shrink_axis_mask"
);
auto
shrink_axis_mask
=
Dec2Bin
(
shrink_axis_mask_str
);
for
(
size_t
l
=
0
;
l
<
shrink_axis_mask
.
size
();
l
++
)
{
if
(
shrink_axis_mask
[
l
])
{
end_
[
l
]
=
end_
[
l
]
>
begin_
[
l
]
?
begin_
[
l
]
+
1
:
begin_
[
l
]
-
1
;
strides_
[
l
]
=
end_
[
l
]
>
begin_
[
l
]
?
1
:
-
1
;
}
}
}
std
::
vector
<
bool
>
Dec2Bin
(
const
int
&
mask
)
{
auto
mask_str
=
std
::
bitset
<
MAX_DIMS
>
(
mask
).
to_string
();
int
dim_idx
=
0
;
std
::
vector
<
bool
>
result
=
{
false
,
false
,
false
,
false
};
for
(
int
i
=
mask_str
.
size
()
-
1
;
i
>=
0
;
i
--
)
{
if
(
mask_str
[
i
]
==
'1'
)
{
result
[
dim_idx
]
=
true
;
}
dim_idx
++
;
}
return
result
;
}
void
FillOutputDim
()
{
for
(
int
i
=
0
;
i
<
MAX_DIMS
;
i
++
)
{
if
(
begin_
[
i
]
<=
end_
[
i
]
&&
strides_
[
i
]
>
0
)
{
output_shape_
.
push_back
((
end_
[
i
]
-
1
-
begin_
[
i
])
/
strides_
[
i
]
+
1
);
}
else
if
(
begin_
[
i
]
>
end_
[
i
]
&&
strides_
[
i
]
<
0
)
{
output_shape_
.
push_back
((
end_
[
i
]
-
begin_
[
i
]
+
1
)
/
strides_
[
i
]
+
1
);
}
else
{
output_shape_
.
push_back
(
0
);
}
}
}
bool
IsNullOutput
()
{
for
(
int
i
=
0
;
i
<
MAX_DIMS
;
i
++
)
{
if
(
begin_
[
i
]
>=
end_
[
i
]
&&
strides_
[
i
]
>
0
)
{
return
true
;
}
if
(
begin_
[
i
]
<
end_
[
i
]
&&
strides_
[
i
]
<
0
)
{
return
true
;
}
}
return
false
;
}
std
::
vector
<
int
>
begin_
;
std
::
vector
<
int
>
end_
;
std
::
vector
<
int
>
strides_
;
std
::
vector
<
size_t
>
input_shape_
;
std
::
vector
<
int
>
output_shape_
;
int
null_output_
;
std
::
vector
<
size_t
>
input_size_list_
;
std
::
vector
<
size_t
>
output_size_list_
;
std
::
vector
<
size_t
>
workspace_size_list_
;
};
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_STRIDED_SLICE_GPU_KERNEL_H
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.cc
0 → 100644
浏览文件 @
d3167da8
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.h"
namespace
mindspore
{
namespace
kernel
{
MS_REG_GPU_KERNEL_ONE
(
StridedSliceGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
StridedSliceGradGpuKernel
,
float
)
MS_REG_GPU_KERNEL_ONE
(
StridedSliceGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
StridedSliceGradGpuKernel
,
half
)
MS_REG_GPU_KERNEL_ONE
(
StridedSliceGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeInt32
).
AddOutputAttr
(
kNumberTypeInt32
),
StridedSliceGradGpuKernel
,
int
)
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_grad_gpu_kernel.h
0 → 100644
浏览文件 @
d3167da8
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_STRIDED_SLICE_GRAD_GPU_KERNEL_H
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_STRIDED_SLICE_GRAD_GPU_KERNEL_H
#include <vector>
#include <bitset>
#include <algorithm>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
namespace
mindspore
{
namespace
kernel
{
constexpr
int
MAX_DIMS
=
4
;
template
<
typename
T
>
class
StridedSliceGradGpuKernel
:
public
GpuKernel
{
public:
StridedSliceGradGpuKernel
()
:
null_output_
(
false
)
{}
~
StridedSliceGradGpuKernel
()
override
=
default
;
const
std
::
vector
<
size_t
>
&
GetInputSizeList
()
const
override
{
return
input_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetOutputSizeList
()
const
override
{
return
output_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetWorkspaceSizeList
()
const
override
{
return
workspace_size_list_
;
}
bool
Launch
(
const
std
::
vector
<
AddressPtr
>
&
inputs
,
const
std
::
vector
<
AddressPtr
>
&
,
const
std
::
vector
<
AddressPtr
>
&
outputs
,
void
*
stream_ptr
)
override
{
T
*
dy
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
T
*
dx
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
FillDeviceArray
(
outputs
[
0
]
->
size
/
sizeof
(
T
),
dx
,
0.
f
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
if
(
null_output_
)
{
return
true
;
}
StridedSliceGrad
(
output_shape_
,
begin_
,
strides_
,
input_shape_
,
dy
,
dx
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
return
true
;
}
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
input_shape_
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"shapex"
);
if
(
input_shape_
.
size
()
>
MAX_DIMS
)
{
MS_LOG
(
ERROR
)
<<
"StridedSliceGrad support support dims less than "
<<
input_shape_
.
size
();
return
false
;
}
FillEmptyDims
(
kernel_node
);
ParseMasks
(
kernel_node
);
FillOutputDim
();
null_output_
=
IsNullOutput
();
InitSizeLists
();
return
true
;
}
protected:
void
InitSizeLists
()
override
{
input_size_list_
.
push_back
(
output_shape_
[
0
]
*
output_shape_
[
1
]
*
output_shape_
[
2
]
*
output_shape_
[
3
]
*
sizeof
(
T
));
output_size_list_
.
push_back
(
input_shape_
[
0
]
*
input_shape_
[
1
]
*
input_shape_
[
2
]
*
input_shape_
[
3
]
*
sizeof
(
T
));
}
private:
void
FillEmptyDims
(
const
CNodePtr
&
kernel_node
)
{
begin_
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"begin"
);
end_
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"end"
);
strides_
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"strides"
);
for
(
size_t
i
=
0
;
i
<
MAX_DIMS
;
i
++
)
{
if
(
i
<
begin_
.
size
())
{
begin_
[
i
]
=
std
::
min
(
begin_
[
i
]
<
0
?
SizeToInt
(
begin_
[
i
]
+
input_shape_
[
i
])
:
begin_
[
i
],
SizeToInt
(
input_shape_
[
i
]
-
1
));
}
else
{
begin_
.
push_back
(
0
);
}
if
(
i
<
end_
.
size
())
{
end_
[
i
]
=
std
::
max
(
end_
[
i
]
<
0
?
end_
[
i
]
+
SizeToInt
(
input_shape_
[
i
])
:
end_
[
i
],
-
1
);
}
else
{
end_
.
push_back
(
i
<
input_shape_
.
size
()
?
input_shape_
[
i
]
:
1
);
}
if
(
i
>=
strides_
.
size
())
{
strides_
.
push_back
(
1
);
}
if
(
i
>=
input_shape_
.
size
())
{
input_shape_
.
push_back
(
1
);
}
}
}
void
ParseMasks
(
const
CNodePtr
&
kernel_node
)
{
auto
begin_mask_int
=
GetAttr
<
int
>
(
kernel_node
,
"begin_mask"
);
auto
begin_mask
=
Dec2Bin
(
begin_mask_int
);
for
(
size_t
i
=
0
;
i
<
begin_mask
.
size
();
i
++
)
{
if
(
begin_mask
[
i
])
{
begin_
[
i
]
=
0
;
}
}
auto
end_mask_int
=
GetAttr
<
int
>
(
kernel_node
,
"end_mask"
);
auto
end_mask
=
Dec2Bin
(
end_mask_int
);
for
(
size_t
j
=
0
;
j
<
end_mask
.
size
();
j
++
)
{
if
(
end_mask
[
j
])
{
end_
[
j
]
=
input_shape_
[
j
];
}
}
auto
ellipsis_mask_int
=
GetAttr
<
int
>
(
kernel_node
,
"ellipsis_mask"
);
auto
ellipsis_mask
=
Dec2Bin
(
ellipsis_mask_int
);
for
(
size_t
k
=
0
;
k
<
ellipsis_mask
.
size
();
k
++
)
{
if
(
ellipsis_mask
[
k
])
{
begin_
[
k
]
=
0
;
end_
[
k
]
=
input_shape_
[
k
];
strides_
[
k
]
=
1
;
}
}
auto
shrink_axis_mask_str
=
GetAttr
<
int
>
(
kernel_node
,
"shrink_axis_mask"
);
auto
shrink_axis_mask
=
Dec2Bin
(
shrink_axis_mask_str
);
for
(
size_t
l
=
0
;
l
<
shrink_axis_mask
.
size
();
l
++
)
{
if
(
shrink_axis_mask
[
l
])
{
end_
[
l
]
=
end_
[
l
]
>
begin_
[
l
]
?
begin_
[
l
]
+
1
:
begin_
[
l
]
-
1
;
strides_
[
l
]
=
end_
[
l
]
>
begin_
[
l
]
?
1
:
-
1
;
}
}
}
std
::
vector
<
bool
>
Dec2Bin
(
const
int
&
mask
)
{
auto
mask_str
=
std
::
bitset
<
MAX_DIMS
>
(
mask
).
to_string
();
int
dim_idx
=
0
;
std
::
vector
<
bool
>
result
=
{
false
,
false
,
false
,
false
};
for
(
int
i
=
mask_str
.
size
()
-
1
;
i
>=
0
;
i
--
)
{
if
(
mask_str
[
i
]
==
'1'
)
{
result
[
dim_idx
]
=
true
;
}
dim_idx
++
;
}
return
result
;
}
void
FillOutputDim
()
{
for
(
int
i
=
0
;
i
<
MAX_DIMS
;
i
++
)
{
if
(
begin_
[
i
]
<=
end_
[
i
]
&&
strides_
[
i
]
>
0
)
{
output_shape_
.
push_back
((
end_
[
i
]
-
1
-
begin_
[
i
])
/
strides_
[
i
]
+
1
);
}
else
if
(
begin_
[
i
]
>
end_
[
i
]
&&
strides_
[
i
]
<
0
)
{
output_shape_
.
push_back
((
end_
[
i
]
-
begin_
[
i
]
+
1
)
/
strides_
[
i
]
+
1
);
}
else
{
output_shape_
.
push_back
(
0
);
}
}
}
bool
IsNullOutput
()
{
for
(
int
i
=
0
;
i
<
MAX_DIMS
;
i
++
)
{
if
(
begin_
[
i
]
>=
end_
[
i
]
&&
strides_
[
i
]
>
0
)
{
return
true
;
}
if
(
begin_
[
i
]
<
end_
[
i
]
&&
strides_
[
i
]
<
0
)
{
return
true
;
}
}
return
false
;
}
std
::
vector
<
int
>
begin_
;
std
::
vector
<
int
>
end_
;
std
::
vector
<
int
>
strides_
;
std
::
vector
<
int
>
input_shape_
;
std
::
vector
<
int
>
output_shape_
;
int
null_output_
;
std
::
vector
<
size_t
>
input_size_list_
;
std
::
vector
<
size_t
>
output_size_list_
;
std
::
vector
<
size_t
>
workspace_size_list_
;
};
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_STRIDED_SLICE_GRAD_GPU_KERNEL_H
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu
100755 → 100644
浏览文件 @
d3167da8
...
@@ -21,48 +21,29 @@
...
@@ -21,48 +21,29 @@
#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
template
<
typename
T
>
template
<
typename
T
>
__global__
void
Slice4D
(
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
s4
,
__global__
void
Slice4D
(
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
s4
,
const
int
l1
,
const
int
l2
,
const
int
l1
,
const
int
l2
,
const
int
l3
,
const
int
l4
,
const
int
l3
,
const
int
l4
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
const
T
*
input
,
T
*
output
)
{
const
T
*
input
,
T
*
output
)
{
for
(
size_t
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
(
l1
*
l2
*
l3
*
l4
);
pos
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
size_t
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
(
l1
*
l2
*
l3
*
l4
);
pos
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
i
=
pos
/
(
l2
*
l3
*
l4
)
%
l1
;
int
i
=
pos
/
(
l2
*
l3
*
l4
)
%
l1
;
int
j
=
pos
/
(
l3
*
l4
)
%
l2
;
int
j
=
pos
/
(
l3
*
l4
)
%
l2
;
int
k
=
pos
/
l4
%
l3
;
int
k
=
pos
/
l4
%
l3
;
int
o
=
pos
%
l4
;
int
o
=
pos
%
l4
;
int
offset
=
(
i
+
s1
)
*
(
d2
*
d3
*
d4
)
+
int
offset
=
(
i
+
s1
)
*
(
d2
*
d3
*
d4
)
+
(
j
+
s2
)
*
(
d3
*
d4
)
+
(
k
+
s3
)
*
d4
+
(
o
+
s4
);
(
j
+
s2
)
*
(
d3
*
d4
)
+
(
k
+
s3
)
*
d4
+
(
o
+
s4
);
output
[
pos
]
=
input
[
offset
];
output
[
pos
]
=
input
[
offset
];
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
__global__
void
SliceGrad
(
const
T
*
dy
,
int
p
,
int
start
,
int
length
,
T
*
output
)
{
__global__
void
SliceGrad
(
const
T
*
dy
,
int
p
,
int
start
,
int
length
,
T
*
output
)
{
for
(
size_t
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
(
length
);
pos
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
size_t
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
(
length
);
pos
+=
blockDim
.
x
*
gridDim
.
x
)
{
output
[
start
+
pos
]
=
dy
[
p
+
pos
];
output
[
start
+
pos
]
=
dy
[
p
+
pos
];
}
}
return
;
return
;
}
}
template
<
typename
T
>
template
<
typename
T
>
__global__
void
StridedSlice
(
const
T
*
input
,
int
p
,
int
start
,
int
begin
,
int
stride
,
int
ended
,
T
*
output
)
{
__global__
void
FillArray
(
T
*
addr
,
const
size_t
len
,
const
float
value
)
{
for
(
size_t
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
std
::
ceil
(
static_cast
<
float
>
(
ended
-
begin
)
/
stride
);
pos
+=
blockDim
.
x
*
gridDim
.
x
)
{
output
[
p
+
pos
]
=
input
[
start
+
pos
*
stride
];
}
return
;
}
template
<
typename
T
>
__global__
void
StridedSliceGrad
(
const
T
*
dy
,
int
p
,
int
start
,
int
begin
,
int
stride
,
int
ended
,
T
*
dx
)
{
for
(
size_t
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
std
::
ceil
(
static_cast
<
float
>
(
ended
-
begin
)
/
stride
);
pos
+=
blockDim
.
x
*
gridDim
.
x
)
{
dx
[
start
+
pos
*
stride
]
=
dy
[
p
+
pos
];
}
return
;
}
template
<
typename
T
>
__global__
void
FillArray
(
T
*
addr
,
const
size_t
len
,
const
float
value
)
{
T
value_
=
static_cast
<
T
>
(
value
);
T
value_
=
static_cast
<
T
>
(
value
);
for
(
size_t
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
len
;
pos
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
size_t
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
len
;
pos
+=
blockDim
.
x
*
gridDim
.
x
)
{
addr
[
pos
]
=
value_
;
addr
[
pos
]
=
value_
;
...
@@ -70,21 +51,20 @@ __global__ void FillArray(T* addr, const size_t len, const float value) {
...
@@ -70,21 +51,20 @@ __global__ void FillArray(T* addr, const size_t len, const float value) {
return
;
return
;
}
}
template
<
typename
T
>
template
<
typename
T
>
void
FillDeviceArray
(
const
size_t
input_size
,
T
*
addr
,
const
float
value
,
cudaStream_t
cuda_stream
)
{
void
FillDeviceArray
(
const
size_t
input_size
,
T
*
addr
,
const
float
value
,
cudaStream_t
cuda_stream
)
{
FillArray
<<<
GET_BLOCKS
(
input_size
),
GET_THREADS
,
0
,
cuda_stream
>>>
(
addr
,
input_size
,
value
);
FillArray
<<<
GET_BLOCKS
(
input_size
),
GET_THREADS
,
0
,
cuda_stream
>>>
(
addr
,
input_size
,
value
);
return
;
return
;
}
}
template
<
typename
T
>
template
<
typename
T
>
void
Slice4DKernel
(
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
s4
,
void
Slice4DKernel
(
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
s4
,
const
int
l1
,
const
int
l2
,
const
int
l3
,
const
int
l1
,
const
int
l2
,
const
int
l3
,
const
int
l4
,
const
int
l4
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
const
T
*
input
,
T
*
output
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
cudaStream_t
stream
)
{
const
T
*
input
,
T
*
output
,
cudaStream_t
stream
)
{
Slice4D
<<<
GET_BLOCKS
(
l1
*
l2
*
l3
*
l4
),
GET_THREADS
,
0
,
stream
>>>
(
s1
,
s2
,
s3
,
s4
,
l1
,
l2
,
l3
,
l4
,
d1
,
d2
,
d3
,
d4
,
Slice4D
<<<
GET_BLOCKS
(
l1
*
l2
*
l3
*
l4
),
GET_THREADS
,
0
,
stream
>>>
(
s1
,
s2
,
s3
,
s4
,
l1
,
l2
,
l3
,
l4
,
input
,
output
);
d1
,
d2
,
d3
,
d4
,
input
,
output
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
CalSliceGrad
(
const
size_t
input_size
,
const
T
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
const
std
::
vector
<
int
>
begin
,
void
CalSliceGrad
(
const
size_t
input_size
,
const
T
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
size
,
T
*
output
,
cudaStream_t
cuda_stream
)
{
const
std
::
vector
<
int
>
size
,
T
*
output
,
cudaStream_t
cuda_stream
)
{
int
block
=
in_shape
[
1
]
*
in_shape
[
2
]
*
in_shape
[
3
];
int
block
=
in_shape
[
1
]
*
in_shape
[
2
]
*
in_shape
[
3
];
int
map
=
in_shape
[
2
]
*
in_shape
[
3
];
int
map
=
in_shape
[
2
]
*
in_shape
[
3
];
int
w
=
in_shape
[
3
];
int
w
=
in_shape
[
3
];
...
@@ -100,92 +80,100 @@ void CalSliceGrad(const size_t input_size, const T* dy, const std::vector<int> i
...
@@ -100,92 +80,100 @@ void CalSliceGrad(const size_t input_size, const T* dy, const std::vector<int> i
}
}
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
void
CalStridedSlice
(
const
size_t
input_size
,
const
T
*
input
,
const
std
::
vector
<
int
>
in_shape
,
__global__
void
StridedSliceKernel
(
const
int
b0
,
const
int
b1
,
const
int
b2
,
const
int
b3
,
const
int
s0
,
const
int
s1
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
end
,
const
std
::
vector
<
int
>
strides
,
const
int
s2
,
const
int
s3
,
const
int
i0
,
const
int
i1
,
const
int
i2
,
const
int
i3
,
T
*
output
,
cudaStream_t
cuda_stream
)
{
const
int
o0
,
const
int
o1
,
const
int
o2
,
const
int
o3
,
const
T
*
input_addr
,
int
block
=
in_shape
[
1
]
*
in_shape
[
2
]
*
in_shape
[
3
];
T
*
output_addr
)
{
int
map
=
in_shape
[
2
]
*
in_shape
[
3
];
int
output_num
=
o0
*
o1
*
o2
*
o3
;
int
w
=
in_shape
[
3
];
for
(
size_t
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
output_num
;
pos
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
ended
=
end
[
3
];
int
i
=
pos
/
(
o1
*
o2
*
o3
)
%
o0
;
int
p
=
0
;
int
j
=
pos
/
(
o2
*
o3
)
%
o1
;
int
start
=
0
;
int
k
=
pos
/
o3
%
o2
;
for
(
int
i
=
begin
[
0
];
i
<
((
end
[
0
]
>
begin
[
0
])
?
end
[
0
]
:
(
2
*
begin
[
0
]
-
end
[
0
]));
i
+=
std
::
abs
(
strides
[
0
]))
{
int
l
=
pos
%
o3
;
for
(
int
j
=
begin
[
1
];
j
<
((
end
[
1
]
>
begin
[
1
])
?
end
[
1
]
:
(
2
*
begin
[
1
]
-
end
[
1
]));
j
+=
std
::
abs
(
strides
[
1
]))
{
for
(
int
k
=
begin
[
2
];
k
<
((
end
[
2
]
>
begin
[
2
])
?
end
[
2
]
:
(
2
*
begin
[
2
]
-
end
[
2
]));
k
+=
std
::
abs
(
strides
[
2
]))
{
int
input_idx
=
(
i
*
s0
+
b0
)
*
i1
*
i2
*
i3
+
(
j
*
s1
+
b1
)
*
i2
*
i3
+
(
k
*
s2
+
b2
)
*
i3
+
(
l
*
s3
+
b3
);
start
=
(
strides
[
0
]
>
0
?
i
:
2
*
begin
[
0
]
-
i
)
*
block
+
(
strides
[
1
]
>
0
?
j
:
2
*
begin
[
1
]
-
j
)
*
map
+
output_addr
[
pos
]
=
input_addr
[
input_idx
];
(
strides
[
2
]
>
0
?
k
:
2
*
begin
[
2
]
-
k
)
*
w
+
begin
[
3
];
StridedSlice
<<<
GET_BLOCKS
(
input_size
),
GET_THREADS
,
0
,
cuda_stream
>>>
(
input
,
p
,
start
,
begin
[
3
],
strides
[
3
],
ended
,
output
);
p
=
p
+
std
::
ceil
(
static_cast
<
float
>
(
end
[
3
]
-
begin
[
3
])
/
strides
[
3
]);
}
}
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
void
CalStridedSliceGrad
(
const
size_t
input_size
,
const
T
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
void
StridedSlice
(
const
std
::
vector
<
size_t
>
&
input_shape
,
const
std
::
vector
<
int
>
&
begin
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
end
,
const
std
::
vector
<
int
>
strides
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
output_shape
,
const
T
*
input
,
T
*
output
,
T
*
dx
,
cudaStream_t
cuda_stream
)
{
cudaStream_t
cuda_stream
)
{
int
block
=
in_shape
[
1
]
*
in_shape
[
2
]
*
in_shape
[
3
];
int
size
=
output_shape
[
0
]
*
output_shape
[
1
]
*
output_shape
[
2
]
*
output_shape
[
3
];
int
map
=
in_shape
[
2
]
*
in_shape
[
3
];
StridedSliceKernel
<<<
GET_BLOCKS
(
size
),
GET_THREADS
,
0
,
cuda_stream
>>>
(
int
w
=
in_shape
[
3
];
begin
[
0
],
begin
[
1
],
begin
[
2
],
begin
[
3
],
strides
[
0
],
strides
[
1
],
strides
[
2
],
strides
[
3
],
input_shape
[
0
],
int
ended
=
end
[
3
];
input_shape
[
1
],
input_shape
[
2
],
input_shape
[
3
],
output_shape
[
0
],
output_shape
[
1
],
output_shape
[
2
],
output_shape
[
3
],
int
p
=
0
;
input
,
output
);
int
start
=
0
;
}
for
(
int
i
=
begin
[
0
];
i
<
((
end
[
0
]
>
begin
[
0
])
?
end
[
0
]
:
(
2
*
begin
[
0
]
-
end
[
0
]
+
1
));
i
+=
std
::
abs
(
strides
[
0
]))
{
for
(
int
j
=
begin
[
1
];
j
<
((
end
[
1
]
>
begin
[
1
])
?
end
[
1
]
:
(
2
*
begin
[
1
]
-
end
[
1
]
+
1
));
template
<
typename
T
>
j
+=
std
::
abs
(
strides
[
1
]))
{
__global__
void
StridedSliceGradKernel
(
const
int
b0
,
const
int
b1
,
const
int
b2
,
const
int
b3
,
const
int
s0
,
for
(
int
k
=
begin
[
2
];
k
<
((
end
[
2
]
>
begin
[
2
])
?
end
[
2
]
:
(
2
*
begin
[
2
]
-
end
[
2
]
+
1
));
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
i0
,
const
int
i1
,
k
+=
std
::
abs
(
strides
[
2
]))
{
const
int
i2
,
const
int
i3
,
const
int
o0
,
const
int
o1
,
const
int
o2
,
start
=
(
strides
[
0
]
>
0
?
i
:
2
*
begin
[
0
]
-
i
)
*
block
+
(
strides
[
1
]
>
0
?
j
:
2
*
begin
[
1
]
-
j
)
*
map
+
const
int
o3
,
const
T
*
dy
,
T
*
dx
)
{
(
strides
[
2
]
>
0
?
k
:
2
*
begin
[
2
]
-
k
)
*
w
+
begin
[
3
];
int
output_num
=
o0
*
o1
*
o2
*
o3
;
StridedSliceGrad
<<<
GET_BLOCKS
(
input_size
),
GET_THREADS
,
0
,
cuda_stream
>>>
(
dy
,
p
,
start
,
begin
[
3
],
strides
[
3
],
for
(
size_t
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
output_num
;
pos
+=
blockDim
.
x
*
gridDim
.
x
)
{
ended
,
dx
);
int
i
=
pos
/
(
o1
*
o2
*
o3
)
%
o0
;
p
=
p
+
std
::
ceil
(
static_cast
<
float
>
(
end
[
3
]
-
begin
[
3
])
/
strides
[
3
]);
int
j
=
pos
/
(
o2
*
o3
)
%
o1
;
}
int
k
=
pos
/
o3
%
o2
;
}
int
l
=
pos
%
o3
;
int
input_idx
=
(
i
*
s0
+
b0
)
*
i1
*
i2
*
i3
+
(
j
*
s1
+
b1
)
*
i2
*
i3
+
(
k
*
s2
+
b2
)
*
i3
+
(
l
*
s3
+
b3
);
dx
[
input_idx
]
=
dy
[
pos
];
}
}
return
;
}
template
<
typename
T
>
void
StridedSliceGrad
(
const
std
::
vector
<
int
>
&
dy_shape
,
const
std
::
vector
<
int
>
&
begin
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
dx_shape
,
const
T
*
dy
,
T
*
dx
,
cudaStream_t
cuda_stream
)
{
int
size
=
dy_shape
[
0
]
*
dy_shape
[
1
]
*
dy_shape
[
2
]
*
dy_shape
[
3
];
StridedSliceGradKernel
<<<
GET_BLOCKS
(
size
),
GET_THREADS
,
0
,
cuda_stream
>>>
(
begin
[
0
],
begin
[
1
],
begin
[
2
],
begin
[
3
],
strides
[
0
],
strides
[
1
],
strides
[
2
],
strides
[
3
],
dx_shape
[
0
],
dx_shape
[
1
],
dx_shape
[
2
],
dx_shape
[
3
],
dy_shape
[
0
],
dy_shape
[
1
],
dy_shape
[
2
],
dy_shape
[
3
],
dy
,
dx
);
}
}
template
void
FillDeviceArray
<
float
>(
const
size_t
input_size
,
float
*
addr
,
const
float
value
,
cudaStream_t
cuda_stream
);
template
void
FillDeviceArray
<
float
>(
const
size_t
input_size
,
float
*
addr
,
const
float
value
,
cudaStream_t
cuda_stream
);
template
void
Slice4DKernel
(
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
s4
,
template
void
Slice4DKernel
(
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
s4
,
const
int
l1
,
const
int
l2
,
const
int
l1
,
const
int
l2
,
const
int
l3
,
const
int
l4
,
const
int
l3
,
const
int
l4
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
const
float
*
input
,
float
*
output
,
cudaStream_t
stream
);
const
float
*
input
,
float
*
output
,
cudaStream_t
stream
);
template
void
CalSliceGrad
<
float
>(
const
size_t
input_size
,
const
float
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
template
void
CalSliceGrad
<
float
>(
const
size_t
input_size
,
const
float
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
size
,
float
*
output
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
size
,
float
*
output
,
cudaStream_t
cuda_stream
);
cudaStream_t
cuda_stream
);
template
void
CalStridedSlice
<
float
>(
const
size_t
input_size
,
const
float
*
input
,
const
std
::
vector
<
int
>
in_shape
,
template
void
FillDeviceArray
<
half
>(
const
size_t
input_size
,
half
*
addr
,
const
float
value
,
cudaStream_t
cuda_stream
);
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
end
,
template
void
Slice4DKernel
(
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
s4
,
const
int
l1
,
const
int
l2
,
const
std
::
vector
<
int
>
strides
,
float
*
output
,
cudaStream_t
cuda_stream
);
const
int
l3
,
const
int
l4
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
template
void
CalStridedSliceGrad
<
float
>(
const
size_t
input_size
,
const
float
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
end
,
const
std
::
vector
<
int
>
strides
,
float
*
dx
,
cudaStream_t
cuda_stream
);
template
void
FillDeviceArray
<
half
>(
const
size_t
input_size
,
half
*
addr
,
const
float
value
,
cudaStream_t
cuda_stream
);
template
void
Slice4DKernel
(
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
s4
,
const
int
l1
,
const
int
l2
,
const
int
l3
,
const
int
l4
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
const
half
*
input
,
half
*
output
,
cudaStream_t
stream
);
const
half
*
input
,
half
*
output
,
cudaStream_t
stream
);
template
void
CalSliceGrad
<
half
>(
const
size_t
input_size
,
const
half
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
template
void
CalSliceGrad
<
half
>(
const
size_t
input_size
,
const
half
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
size
,
half
*
output
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
size
,
half
*
output
,
cudaStream_t
cuda_stream
);
cudaStream_t
cuda_stream
);
template
void
CalStridedSlice
<
half
>(
const
size_t
input_size
,
const
half
*
input
,
const
std
::
vector
<
int
>
in_shape
,
template
void
FillDeviceArray
<
int
>(
const
size_t
input_size
,
int
*
addr
,
const
float
value
,
cudaStream_t
cuda_stream
);
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
end
,
template
void
Slice4DKernel
(
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
s4
,
const
int
l1
,
const
int
l2
,
const
std
::
vector
<
int
>
strides
,
half
*
output
,
cudaStream_t
cuda_stream
);
const
int
l3
,
const
int
l4
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
template
void
CalStridedSliceGrad
<
half
>(
const
size_t
input_size
,
const
half
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
end
,
const
std
::
vector
<
int
>
strides
,
half
*
dx
,
cudaStream_t
cuda_stream
);
template
void
FillDeviceArray
<
int
>(
const
size_t
input_size
,
int
*
addr
,
const
float
value
,
cudaStream_t
cuda_stream
);
template
void
Slice4DKernel
(
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
s4
,
const
int
l1
,
const
int
l2
,
const
int
l3
,
const
int
l4
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
const
int
*
input
,
int
*
output
,
cudaStream_t
stream
);
const
int
*
input
,
int
*
output
,
cudaStream_t
stream
);
template
void
CalSliceGrad
<
int
>(
const
size_t
input_size
,
const
int
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
template
void
CalSliceGrad
<
int
>(
const
size_t
input_size
,
const
int
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
size
,
int
*
output
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
size
,
int
*
output
,
cudaStream_t
cuda_stream
);
cudaStream_t
cuda_stream
);
template
void
CalStridedSlice
<
int
>(
const
size_t
input_size
,
const
int
*
input
,
const
std
::
vector
<
int
>
in_shape
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
end
,
template
void
StridedSlice
(
const
std
::
vector
<
size_t
>
&
input_shape
,
const
std
::
vector
<
int
>
&
begin
,
const
std
::
vector
<
int
>
strides
,
int
*
output
,
cudaStream_t
cuda_stream
);
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
output_shape
,
const
float
*
input
,
template
void
CalStridedSliceGrad
<
int
>(
const
size_t
input_size
,
const
int
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
float
*
output
,
cudaStream_t
cuda_stream
);
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
end
,
template
void
StridedSlice
(
const
std
::
vector
<
size_t
>
&
input_shape
,
const
std
::
vector
<
int
>
&
begin
,
const
std
::
vector
<
int
>
strides
,
int
*
dx
,
cudaStream_t
cuda_stream
);
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
output_shape
,
const
half
*
input
,
half
*
output
,
cudaStream_t
cuda_stream
);
template
void
StridedSlice
(
const
std
::
vector
<
size_t
>
&
input_shape
,
const
std
::
vector
<
int
>
&
begin
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
output_shape
,
const
int
*
input
,
int
*
output
,
cudaStream_t
cuda_stream
);
template
void
StridedSliceGrad
(
const
std
::
vector
<
int
>
&
dy_shape
,
const
std
::
vector
<
int
>
&
begin
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
dx_shape
,
const
float
*
dy
,
float
*
dx
,
cudaStream_t
cuda_stream
);
template
void
StridedSliceGrad
(
const
std
::
vector
<
int
>
&
dy_shape
,
const
std
::
vector
<
int
>
&
begin
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
dx_shape
,
const
half
*
dy
,
half
*
dx
,
cudaStream_t
cuda_stream
);
template
void
StridedSliceGrad
(
const
std
::
vector
<
int
>
&
dy_shape
,
const
std
::
vector
<
int
>
&
begin
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
dx_shape
,
const
int
*
dy
,
int
*
dx
,
cudaStream_t
cuda_stream
);
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh
100755 → 100644
浏览文件 @
d3167da8
...
@@ -21,23 +21,20 @@
...
@@ -21,23 +21,20 @@
#include <vector>
#include <vector>
#include "runtime/device/gpu/cuda_common.h"
#include "runtime/device/gpu/cuda_common.h"
template
<
typename
T
>
template
<
typename
T
>
void
Slice4DKernel
(
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
s4
,
void
Slice4DKernel
(
const
int
s1
,
const
int
s2
,
const
int
s3
,
const
int
s4
,
const
int
l1
,
const
int
l2
,
const
int
l3
,
const
int
l1
,
const
int
l2
,
const
int
l3
,
const
int
l4
,
const
int
l4
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
const
T
*
input
,
T
*
output
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
cudaStream_t
stream
);
const
T
*
input
,
T
*
output
,
cudaStream_t
stream
);
template
<
typename
T
>
template
<
typename
T
>
void
CalSliceGrad
(
const
size_t
input_size
,
const
T
*
input
,
const
std
::
vector
<
int
>
in_shape
,
void
CalSliceGrad
(
const
size_t
input_size
,
const
T
*
input
,
const
std
::
vector
<
int
>
in_shape
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
size
,
T
*
output
,
cudaStream_t
cuda_stream
);
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
size
,
T
*
output
,
cudaStream_t
cuda_stream
);
template
<
typename
T
>
template
<
typename
T
>
void
CalStridedSlice
(
const
size_t
input_size
,
const
T
*
input
,
const
std
::
vector
<
int
>
in_shape
,
void
StridedSlice
(
const
std
::
vector
<
size_t
>
&
input_shape
,
const
std
::
vector
<
int
>
&
begin
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
end
,
const
std
::
vector
<
int
>
strides
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
output_shape
,
const
T
*
input
,
T
*
output
,
T
*
output
,
cudaStream_t
cuda_stream
);
cudaStream_t
cuda_stream
);
template
<
typename
T
>
template
<
typename
T
>
void
CalStridedSliceGrad
(
const
size_t
input_size
,
const
T
*
dy
,
const
std
::
vector
<
int
>
in_shape
,
void
StridedSliceGrad
(
const
std
::
vector
<
int
>
&
dy_shape
,
const
std
::
vector
<
int
>
&
begin
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
begin
,
const
std
::
vector
<
int
>
end
,
const
std
::
vector
<
int
>
strides
,
const
std
::
vector
<
int
>
&
dx_shape
,
const
T
*
dy
,
T
*
dx
,
cudaStream_t
cuda_stream
);
T
*
dx
,
cudaStream_t
cuda_stream
);
template
<
typename
T
>
template
<
typename
T
>
void
FillDeviceArray
(
const
size_t
input_size
,
T
*
addr
,
const
float
value
,
cudaStream_t
cuda_stream
);
void
FillDeviceArray
(
const
size_t
input_size
,
T
*
addr
,
const
float
value
,
cudaStream_t
cuda_stream
);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SLICEIMPL_H_
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SLICEIMPL_H_
tests/st/ops/gpu/test_stridedslice_grad_op.py
浏览文件 @
d3167da8
...
@@ -19,31 +19,258 @@ import pytest
...
@@ -19,31 +19,258 @@ import pytest
import
mindspore.context
as
context
import
mindspore.context
as
context
import
mindspore.nn
as
nn
import
mindspore.nn
as
nn
from
mindspore
import
Tensor
from
mindspore
import
Tensor
from
mindspore.common.api
import
ms_function
from
mindspore.ops
import
operations
as
P
from
mindspore.ops
import
operations
as
P
from
mindspore.ops
.operations
import
_grad_ops
as
G
from
mindspore.ops
import
composite
as
C
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
'GPU'
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
'GPU'
)
class
StridedSliceGrad
(
nn
.
Cell
):
class
StridedSliceNet
(
nn
.
Cell
):
def
__init__
(
self
):
def
__init__
(
self
,
begin
,
end
,
stride
,
begin_mask
=
0
,
end_mask
=
0
,
ellipsis_mask
=
0
):
super
(
StridedSliceGrad
,
self
).
__init__
()
super
(
StridedSliceNet
,
self
).
__init__
()
self
.
ssg
=
G
.
StridedSliceGrad
()
self
.
begin
=
begin
self
.
shape
=
P
.
Shape
()
self
.
end
=
end
self
.
strides
=
stride
self
.
slice
=
P
.
StridedSlice
(
begin_mask
,
end_mask
,
ellipsis_mask
)
@
ms_function
def
construct
(
self
,
x
):
def
construct
(
self
,
dy
,
x
):
return
self
.
slice
(
x
,
self
.
begin
,
self
.
end
,
self
.
strides
)
return
self
.
ssg
(
dy
,
self
.
shape
(
x
),
(
2
,
0
,
0
),
(
3
,
2
,
3
),
(
1
,
1
,
1
))
class
GradData
(
nn
.
Cell
):
def
__init__
(
self
,
network
):
super
(
GradData
,
self
).
__init__
()
self
.
grad
=
C
.
GradOperation
(
name
=
"get_all"
,
get_all
=
True
,
sens_param
=
False
)
self
.
network
=
network
def
construct
(
self
,
x
):
return
self
.
grad
(
self
.
network
)(
x
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_gpu_training
@
pytest
.
mark
.
platform_x86_gpu_training
@
pytest
.
mark
.
env_onecard
@
pytest
.
mark
.
env_onecard
def
test_slice
():
def
test_strided_slice_grad
():
x
=
Tensor
(
np
.
array
([[[
1
,
1
,
1
],
[
2
,
2
,
2
]],
[[
3
,
3
,
3
],
[
4
,
4
,
4
]],
[[
5
,
5
,
5
],
[
6
,
7
,
8
]]]).
astype
(
np
.
float32
))
x
=
Tensor
(
np
.
arange
(
0
,
2
*
3
*
4
*
5
).
reshape
(
2
,
3
,
4
,
5
).
astype
(
np
.
float32
))
dy
=
Tensor
(
np
.
array
([[[
5.
,
1.
,
5.
],
[
6.
,
1.
,
8.
]]]).
astype
(
np
.
float32
))
net
=
StridedSliceNet
((
1
,
0
,
0
,
2
),
(
2
,
2
,
2
,
4
),
(
1
,
1
,
1
,
1
))
ssg
=
StridedSliceGrad
()
dx
=
GradData
(
net
)(
x
)
output
=
ssg
(
dy
,
x
)
expect
=
np
.
array
([[[[
0.
,
0.
,
0.
,
0.
,
0.
],
expect
=
[[[
0
,
0
,
0
],
[
0
,
0
,
0
]],
[[
0
,
0
,
0
],
[
0
,
0
,
0
]],
[[
5
,
1
,
5
],
[
6
,
1
,
8
]]]
[
0.
,
0.
,
0.
,
0.
,
0.
],
assert
(
output
.
asnumpy
()
==
expect
).
all
()
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]]],
[[[
0.
,
0.
,
1.
,
1.
,
0.
],
[
0.
,
0.
,
1.
,
1.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
1.
,
1.
,
0.
],
[
0.
,
0.
,
1.
,
1.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]]]])
assert
np
.
allclose
(
dx
[
0
].
asnumpy
(),
expect
)
net
=
StridedSliceNet
((
1
,
0
,
0
,
5
),
(
2
,
2
,
2
,
1
),
(
1
,
1
,
1
,
-
2
))
dx
=
GradData
(
net
)(
x
)
expect
=
np
.
array
([[[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]]],
[[[
0.
,
0.
,
1.
,
0.
,
1.
],
[
0.
,
0.
,
1.
,
0.
,
1.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
1.
,
0.
,
1.
],
[
0.
,
0.
,
1.
,
0.
,
1.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]]]])
assert
np
.
allclose
(
dx
[
0
].
asnumpy
(),
expect
)
net
=
StridedSliceNet
((
1
,
0
,
0
,
-
1
),
(
2
,
2
,
2
,
1
),
(
1
,
1
,
1
,
-
1
))
dx
=
GradData
(
net
)(
x
)
expect
=
np
.
array
([[[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]]],
[[[
0.
,
0.
,
1.
,
1.
,
1.
],
[
0.
,
0.
,
1.
,
1.
,
1.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
1.
,
1.
,
1.
],
[
0.
,
0.
,
1.
,
1.
,
1.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]]]])
assert
np
.
allclose
(
dx
[
0
].
asnumpy
(),
expect
)
# ME infer fault
# y = GradData()(x, (1, 0, -1, -2), (2, 2, 0, -5), (1, 1, -1, -2))
# expect = np.array([[[[0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.]],
# [[0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.]],
# [[0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.]]],
# [[[0., 0., 0., 0., 0.],
# [0., 1., 0., 1., 0.],
# [0., 1., 0., 1., 0.],
# [0., 1., 0., 1., 0.]],
# [[0., 0., 0., 0., 0.],
# [0., 1., 0., 1., 0.],
# [0., 1., 0., 1., 0.],
# [0., 1., 0., 1., 0.]],begin_mask=0b1000, end_mask=0b0010, ellipsis_mask=0b0100
# [[0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.]]]])
# assert np.allclose(y.asnumpy(), expect)
# y = Grad(begin_mask=0b1000, end_mask=0b0010)(x, (1, 0, 0, 2), (2, 2, 2, 4), (1, 1, 1, 1))
# expect = np.array([[[[0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.]],
# [[0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.]],
# [[0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.]]],
# [[[0., 0., 1., 1., 0.],
# [0., 0., 1., 1., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.]],
# [[0., 0., 1., 1., 0.],
# [0., 0., 1., 1., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.]],
# [[0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0.]]]])
# assert np.allclose(y.asnumpy(), expect)
net
=
StridedSliceNet
((
1
,
0
,
0
,
2
),
(
2
,
2
,
2
,
4
),
(
1
,
1
,
1
,
1
),
begin_mask
=
0b1000
,
end_mask
=
0b0010
,
ellipsis_mask
=
0b0100
)
dx
=
GradData
(
net
)(
x
)
expect
=
np
.
array
([[[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]]],
[[[
1.
,
1.
,
1.
,
1.
,
0.
],
[
1.
,
1.
,
1.
,
1.
,
0.
],
[
1.
,
1.
,
1.
,
1.
,
0.
],
[
1.
,
1.
,
1.
,
1.
,
0.
]],
[[
1.
,
1.
,
1.
,
1.
,
0.
],
[
1.
,
1.
,
1.
,
1.
,
0.
],
[
1.
,
1.
,
1.
,
1.
,
0.
],
[
1.
,
1.
,
1.
,
1.
,
0.
]],
[[
1.
,
1.
,
1.
,
1.
,
0.
],
[
1.
,
1.
,
1.
,
1.
,
0.
],
[
1.
,
1.
,
1.
,
1.
,
0.
],
[
1.
,
1.
,
1.
,
1.
,
0.
]]]])
assert
np
.
allclose
(
dx
[
0
].
asnumpy
(),
expect
)
x
=
Tensor
(
np
.
arange
(
0
,
3
*
4
*
5
).
reshape
(
3
,
4
,
5
).
astype
(
np
.
float32
))
net
=
StridedSliceNet
((
1
,
0
,
0
),
(
2
,
-
3
,
3
),
(
1
,
1
,
3
))
dx
=
GradData
(
net
)(
x
)
expect
=
np
.
array
([[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
1.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]]])
assert
np
.
allclose
(
dx
[
0
].
asnumpy
(),
expect
)
tests/st/ops/gpu/test_stridedslice_op.py
浏览文件 @
d3167da8
...
@@ -17,29 +17,79 @@ import numpy as np
...
@@ -17,29 +17,79 @@ import numpy as np
import
pytest
import
pytest
import
mindspore.context
as
context
import
mindspore.context
as
context
import
mindspore.nn
as
nn
from
mindspore
import
Tensor
from
mindspore
import
Tensor
from
mindspore.ops
import
operations
as
P
from
mindspore.ops
import
operations
as
P
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
'GPU'
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
'GPU'
)
class
StridedSlice
(
nn
.
Cell
):
def
__init__
(
self
):
super
(
StridedSlice
,
self
).
__init__
()
self
.
stridedslice
=
P
.
StridedSlice
()
def
construct
(
self
,
x
):
return
self
.
stridedslice
(
x
,
(
2
,
0
,
0
),
(
3
,
2
,
3
),
(
1
,
1
,
1
))
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_gpu_training
@
pytest
.
mark
.
platform_x86_gpu_training
@
pytest
.
mark
.
env_onecard
@
pytest
.
mark
.
env_onecard
def
test_slice
():
def
test_stridedslice
():
x
=
Tensor
(
np
.
array
([[[
1
,
1
,
1
],
[
2
,
2
,
2
]],
[[
3
,
3
,
3
],
[
4
,
4
,
4
]],
[[
5
,
5
,
5
],
[
6
,
7
,
8
]]]).
astype
(
np
.
int32
))
x
=
Tensor
(
np
.
arange
(
0
,
2
*
3
*
4
*
5
).
reshape
(
2
,
3
,
4
,
5
).
astype
(
np
.
float32
))
stridedslice
=
StridedSlice
()
y
=
P
.
StridedSlice
()(
x
,
(
1
,
0
,
0
,
2
),
(
2
,
2
,
2
,
4
),
(
1
,
1
,
1
,
1
))
output
=
stridedslice
(
x
)
expect
=
np
.
array
([[[[
62
,
63
],
expect
=
[[[
5.
,
5.
,
5.
],
[
67
,
68
]],
[
6.
,
7.
,
8.
]]]
[[
82
,
83
],
assert
(
output
.
asnumpy
()
==
expect
).
all
()
[
87
,
88
]]]])
assert
np
.
allclose
(
y
.
asnumpy
(),
expect
)
y
=
P
.
StridedSlice
()(
x
,
(
1
,
0
,
0
,
5
),
(
2
,
2
,
2
,
1
),
(
1
,
1
,
1
,
-
2
))
expect
=
np
.
array
([[[[
64
,
62
],
[
69
,
67
]],
[[
84
,
82
],
[
89
,
87
]]]])
assert
np
.
allclose
(
y
.
asnumpy
(),
expect
)
y
=
P
.
StridedSlice
()(
x
,
(
1
,
0
,
0
,
-
1
),
(
2
,
2
,
2
,
1
),
(
1
,
1
,
1
,
-
1
))
expect
=
np
.
array
([[[[
64
,
63
,
62
],
[
69
,
68
,
67
]],
[[
84
,
83
,
82
],
[
89
,
88
,
87
]]]])
assert
np
.
allclose
(
y
.
asnumpy
(),
expect
)
# ME infer fault
# y = P.StridedSlice()(x, (1, 0, -1, -2), (2, 2, 0, -5), (1, 1, -1, -2))
# expect = np.array([[[[78, 76],
# [73, 71],
# [68, 66]],
# [[98, 96],
# [93, 91],
# [88, 86]]]])
# assert np.allclose(y.asnumpy(), expect)
# y = P.StridedSlice(begin_mask=0b1000, end_mask=0b0010)(x, (1, 0, 0, 2), (2, 2, 2, 4), (1, 1, 1, 1))
# expect = np.array([[[[ 62, 63],
# [ 67, 68]],
# [[ 82, 83],
# [ 87, 88]],
# [[102, 103],
# [107, 108]]]])
# assert np.allclose(y.asnumpy(), expect)
op
=
P
.
StridedSlice
(
begin_mask
=
0b1000
,
end_mask
=
0b0010
,
ellipsis_mask
=
0b0100
)
y
=
op
(
x
,
(
1
,
0
,
0
,
2
),
(
2
,
2
,
2
,
4
),
(
1
,
1
,
1
,
1
))
expect
=
np
.
array
([[[[
60
,
61
,
62
,
63
],
[
65
,
66
,
67
,
68
],
[
70
,
71
,
72
,
73
],
[
75
,
76
,
77
,
78
]],
[[
80
,
81
,
82
,
83
],
[
85
,
86
,
87
,
88
],
[
90
,
91
,
92
,
93
],
[
95
,
96
,
97
,
98
]],
[[
100
,
101
,
102
,
103
],
[
105
,
106
,
107
,
108
],
[
110
,
111
,
112
,
113
],
[
115
,
116
,
117
,
118
]]]])
assert
np
.
allclose
(
y
.
asnumpy
(),
expect
)
x
=
Tensor
(
np
.
arange
(
0
,
3
*
4
*
5
).
reshape
(
3
,
4
,
5
).
astype
(
np
.
float32
))
y
=
P
.
StridedSlice
()(
x
,
(
1
,
0
,
0
),
(
2
,
-
3
,
3
),
(
1
,
1
,
3
))
expect
=
np
.
array
([[[
20
]]])
assert
np
.
allclose
(
y
.
asnumpy
(),
expect
)
x_np
=
np
.
arange
(
0
,
4
*
5
).
reshape
(
4
,
5
).
astype
(
np
.
float32
)
y
=
Tensor
(
x_np
)[:,
::
-
1
]
expect
=
x_np
[:,
::
-
1
]
assert
np
.
allclose
(
y
.
asnumpy
(),
expect
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录