Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
f679568d
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f679568d
编写于
7月 21, 2020
作者:
L
linqingke
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
gpu ops code and test case.
上级
ca6756b5
变更
30
隐藏空白更改
内联
并排
Showing
30 changed file
with
1908 addition
and
12 deletion
+1908
-12
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gathernd_gpu_kernel.cc
...backend/kernel_compiler/gpu/arrays/gathernd_gpu_kernel.cc
+33
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gathernd_gpu_kernel.h
.../backend/kernel_compiler/gpu/arrays/gathernd_gpu_kernel.h
+162
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc
...ckend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc
+33
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.h
...ackend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.h
+175
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cu
.../kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cu
+81
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cuh
...kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cuh
+27
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/boundingbox_encode_impl.cu
.../kernel_compiler/gpu/cuda_impl/boundingbox_encode_impl.cu
+62
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/boundingbox_encode_impl.cuh
...kernel_compiler/gpu/cuda_impl/boundingbox_encode_impl.cuh
+26
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu
...c/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu
+32
-8
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh
.../backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh
+1
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gathernd.cu
...e/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gathernd.cu
+65
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gathernd.cuh
.../ccsrc/backend/kernel_compiler/gpu/cuda_impl/gathernd.cuh
+26
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu
...ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu
+68
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cuh
...csrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cuh
+26
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sgd_impl.cu
...e/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sgd_impl.cu
+57
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sgd_impl.cuh
.../ccsrc/backend/kernel_compiler/gpu/cuda_impl/sgd_impl.cuh
+25
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc
.../backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc
+8
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
...c/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
+4
-4
mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sgd_gpu_kernel.cc
...re/ccsrc/backend/kernel_compiler/gpu/nn/sgd_gpu_kernel.cc
+32
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sgd_gpu_kernel.h
...ore/ccsrc/backend/kernel_compiler/gpu/nn/sgd_gpu_kernel.h
+88
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/other/boundingbox_decode_gpu_kernel.cc
...ernel_compiler/gpu/other/boundingbox_decode_gpu_kernel.cc
+26
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/other/boundingbox_decode_gpu_kernel.h
...kernel_compiler/gpu/other/boundingbox_decode_gpu_kernel.h
+152
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/other/boundingbox_encode_gpu_kernel.cc
...ernel_compiler/gpu/other/boundingbox_encode_gpu_kernel.cc
+26
-0
mindspore/ccsrc/backend/kernel_compiler/gpu/other/boundingbox_encode_gpu_kernel.h
...kernel_compiler/gpu/other/boundingbox_encode_gpu_kernel.h
+143
-0
tests/st/ops/gpu/test_boundingbox_decode_op.py
tests/st/ops/gpu/test_boundingbox_decode_op.py
+60
-0
tests/st/ops/gpu/test_boundingbox_encode_op.py
tests/st/ops/gpu/test_boundingbox_encode_op.py
+80
-0
tests/st/ops/gpu/test_floordiv_op.py
tests/st/ops/gpu/test_floordiv_op.py
+116
-0
tests/st/ops/gpu/test_gathernd_op.py
tests/st/ops/gpu/test_gathernd_op.py
+151
-0
tests/st/ops/gpu/test_scatter_nd.py
tests/st/ops/gpu/test_scatter_nd.py
+50
-0
tests/st/ops/gpu/test_sgd_op.py
tests/st/ops/gpu/test_sgd_op.py
+73
-0
未找到文件。
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gathernd_gpu_kernel.cc
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/arrays/gathernd_gpu_kernel.h"
namespace
mindspore
{
namespace
kernel
{
MS_REG_GPU_KERNEL_TWO
(
GatherNd
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddInputAttr
(
kNumberTypeInt32
).
AddOutputAttr
(
kNumberTypeFloat32
),
GatherNdGpuFwdKernel
,
float
,
int
)
MS_REG_GPU_KERNEL_TWO
(
GatherNd
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddInputAttr
(
kNumberTypeInt32
).
AddOutputAttr
(
kNumberTypeFloat16
),
GatherNdGpuFwdKernel
,
half
,
int
)
MS_REG_GPU_KERNEL_TWO
(
GatherNd
,
KernelAttr
().
AddInputAttr
(
kNumberTypeInt32
).
AddInputAttr
(
kNumberTypeInt32
).
AddOutputAttr
(
kNumberTypeInt32
),
GatherNdGpuFwdKernel
,
int
,
int
)
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gathernd_gpu_kernel.h
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_GATHERND_GPU_KERNEL_H
#define MINDSPORE_GATHERND_GPU_KERNEL_H
#include <vector>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/gathernd.cuh"
namespace
mindspore
{
namespace
kernel
{
template
<
typename
T
,
typename
S
>
class
GatherNdGpuFwdKernel
:
public
GpuKernel
{
public:
GatherNdGpuFwdKernel
()
:
dev_batch_strides_
(
nullptr
),
dev_batch_indices_
(
nullptr
)
{}
~
GatherNdGpuFwdKernel
()
{
if
(
dev_batch_strides_
!=
nullptr
)
{
device
::
gpu
::
GPUMemoryAllocator
::
GetInstance
().
FreeTensorMem
(
static_cast
<
void
*>
(
dev_batch_strides_
));
}
if
(
dev_batch_indices_
!=
nullptr
)
{
device
::
gpu
::
GPUMemoryAllocator
::
GetInstance
().
FreeTensorMem
(
static_cast
<
void
*>
(
dev_batch_indices_
));
}
}
const
std
::
vector
<
size_t
>
&
GetInputSizeList
()
const
override
{
return
input_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetOutputSizeList
()
const
override
{
return
output_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetWorkspaceSizeList
()
const
override
{
return
workspace_size_list_
;
}
bool
Launch
(
const
std
::
vector
<
AddressPtr
>
&
inputs
,
const
std
::
vector
<
AddressPtr
>
&
workspace
,
const
std
::
vector
<
AddressPtr
>
&
outputs
,
void
*
stream_ptr
)
override
{
VARIABLE_NOT_USED
(
workspace
);
T
*
input_addr
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
S
*
indices_addr
=
GetDeviceAddress
<
S
>
(
inputs
,
1
);
T
*
output_addr
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
GatherNd
(
input_addr
,
indices_addr
,
output_addr
,
dims_
[
0
],
dims_
[
1
],
dims_
[
2
],
dev_batch_strides_
,
dev_batch_indices_
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
return
true
;
}
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
InitResource
();
size_t
input_num
=
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
if
(
input_num
!=
2
)
{
MS_LOG
(
EXCEPTION
)
<<
"Argument number is "
<<
input_num
<<
", but GatherNdGpuFwdKernel needs 2."
;
}
input_shapes_
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
indices_shapes_
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
1
);
output_shapes_
=
AnfAlgo
::
GetOutputInferShape
(
kernel_node
,
0
);
Reshape
();
size_t
dim_indices_last
=
dims_
[
dims_
.
size
()
-
1
];
batch_strides_
.
resize
(
dim_indices_last
,
0
);
batch_indices_
.
resize
(
dim_indices_last
,
0
);
if
(
dim_indices_last
>
0
)
{
batch_strides_
[
dim_indices_last
-
1
]
=
input_shapes_
[
dim_indices_last
-
1
];
batch_indices_
[
dim_indices_last
-
1
]
=
dims_
[
1
];
}
for
(
size_t
i
=
dim_indices_last
-
1
;
i
>
0
;
--
i
)
{
batch_strides_
[
i
-
1
]
=
input_shapes_
[
i
-
1
];
batch_indices_
[
i
-
1
]
=
batch_indices_
[
i
]
*
input_shapes_
[
i
];
}
size_t
strides_len
=
sizeof
(
S
)
*
batch_strides_
.
size
();
void
*
dev_batch_strides_work
=
device
::
gpu
::
GPUMemoryAllocator
::
GetInstance
().
AllocTensorMem
(
strides_len
);
if
(
dev_batch_strides_work
==
nullptr
)
{
MS_LOG
(
EXCEPTION
)
<<
"Failed to alloc dev_batch_strides_work, size: "
<<
strides_len
;
}
dev_batch_strides_
=
static_cast
<
S
*>
(
dev_batch_strides_work
);
size_t
indices_len
=
sizeof
(
S
)
*
batch_indices_
.
size
();
void
*
dev_batch_indices_work
=
device
::
gpu
::
GPUMemoryAllocator
::
GetInstance
().
AllocTensorMem
(
indices_len
);
if
(
dev_batch_indices_work
==
nullptr
)
{
MS_LOG
(
EXCEPTION
)
<<
"Failed to alloc dev_batch_indices_work, size: "
<<
indices_len
;
}
dev_batch_indices_
=
static_cast
<
S
*>
(
dev_batch_indices_work
);
CHECK_CUDA_RET_WITH_EXCEPT
(
cudaMemcpy
(
dev_batch_strides_
,
&
batch_strides_
[
0
],
strides_len
,
cudaMemcpyHostToDevice
),
"cudaMemcpy failed in GatherNdGpuFwdKernel::Init."
);
CHECK_CUDA_RET_WITH_EXCEPT
(
cudaMemcpy
(
dev_batch_indices_
,
&
batch_indices_
[
0
],
indices_len
,
cudaMemcpyHostToDevice
),
"cudaMemcpy failed in GatherNdGpuFwdKernel::Init."
);
InitSizeLists
();
return
true
;
}
protected:
void
InitSizeLists
()
override
{
size_t
size
=
GetSize
(
input_shapes_
);
input_size_list_
.
push_back
(
size
);
size
=
GetSize
(
indices_shapes_
);
input_size_list_
.
push_back
(
size
);
size
=
GetSize
(
output_shapes_
);
output_size_list_
.
push_back
(
size
);
}
private:
void
Reshape
()
{
size_t
dim_of_indices
=
1
;
for
(
size_t
i
=
0
;
i
<
indices_shapes_
.
size
()
-
IntToSize
(
1
);
i
++
)
{
dim_of_indices
*=
indices_shapes_
[
i
];
}
size_t
dim_after_indices
=
1
;
size_t
dim_indices_last
=
indices_shapes_
[
indices_shapes_
.
size
()
-
IntToSize
(
1
)];
for
(
size_t
i
=
dim_indices_last
;
i
<
input_shapes_
.
size
();
i
++
)
{
dim_after_indices
*=
input_shapes_
[
i
];
}
dims_
.
emplace_back
(
dim_of_indices
);
dims_
.
emplace_back
(
dim_after_indices
);
dims_
.
emplace_back
(
dim_indices_last
);
return
;
}
size_t
GetSize
(
const
std
::
vector
<
size_t
>
&
shape
)
const
{
if
(
shape
.
size
()
==
0
)
{
return
0
;
}
size_t
result
=
sizeof
(
T
);
for
(
size_t
i
=
0
;
i
<
shape
.
size
();
i
++
)
{
result
*=
shape
[
i
];
}
return
result
;
}
std
::
vector
<
size_t
>
input_shapes_
;
std
::
vector
<
size_t
>
indices_shapes_
;
std
::
vector
<
size_t
>
output_shapes_
;
std
::
vector
<
size_t
>
dims_
;
std
::
vector
<
size_t
>
input_size_list_
;
std
::
vector
<
size_t
>
output_size_list_
;
std
::
vector
<
size_t
>
workspace_size_list_
;
std
::
vector
<
S
>
batch_strides_
;
std
::
vector
<
S
>
batch_indices_
;
S
*
dev_batch_strides_
;
S
*
dev_batch_indices_
;
};
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_GATHERND_GPU_KERNEL_H
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.cc
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.h"
namespace
mindspore
{
namespace
kernel
{
MS_REG_GPU_KERNEL_TWO
(
ScatterNd
,
KernelAttr
().
AddInputAttr
(
kNumberTypeInt32
).
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
ScatterNdGpuFwdKernel
,
float
,
int
)
MS_REG_GPU_KERNEL_TWO
(
ScatterNd
,
KernelAttr
().
AddInputAttr
(
kNumberTypeInt32
).
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
ScatterNdGpuFwdKernel
,
half
,
int
)
MS_REG_GPU_KERNEL_TWO
(
ScatterNd
,
KernelAttr
().
AddInputAttr
(
kNumberTypeInt32
).
AddInputAttr
(
kNumberTypeInt32
).
AddOutputAttr
(
kNumberTypeInt32
),
ScatterNdGpuFwdKernel
,
int
,
int
)
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_gpu_kernel.h
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_SCATTER_ND_GPU_KERNEL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_SCATTER_ND_GPU_KERNEL_H
#include <vector>
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
namespace
mindspore
{
namespace
kernel
{
template
<
typename
T
,
typename
S
>
class
ScatterNdGpuFwdKernel
:
public
GpuKernel
{
public:
ScatterNdGpuFwdKernel
()
:
input_size_
(
1
),
indices_size_
(
1
),
output_size_
(
1
),
block_size_
(
1
),
indices_stride_
(
nullptr
),
work_shape_
(
nullptr
),
indices_dim_0_
(
0
),
indices_dim_1_
(
0
)
{}
~
ScatterNdGpuFwdKernel
()
{
if
(
indices_stride_
!=
nullptr
)
{
device
::
gpu
::
GPUMemoryAllocator
::
GetInstance
().
FreeTensorMem
(
static_cast
<
void
*>
(
indices_stride_
));
}
if
(
work_shape_
!=
nullptr
)
{
device
::
gpu
::
GPUMemoryAllocator
::
GetInstance
().
FreeTensorMem
(
static_cast
<
void
*>
(
work_shape_
));
}
}
const
std
::
vector
<
size_t
>
&
GetInputSizeList
()
const
override
{
return
input_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetOutputSizeList
()
const
override
{
return
output_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetWorkspaceSizeList
()
const
override
{
return
workspace_size_list_
;
}
bool
Launch
(
const
std
::
vector
<
AddressPtr
>
&
inputs
,
const
std
::
vector
<
AddressPtr
>
&
workspace
,
const
std
::
vector
<
AddressPtr
>
&
outputs
,
void
*
stream_ptr
)
override
{
VARIABLE_NOT_USED
(
workspace
);
S
*
indices
=
GetDeviceAddress
<
S
>
(
inputs
,
0
);
T
*
update
=
GetDeviceAddress
<
T
>
(
inputs
,
1
);
T
*
output
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
ScatterNd
(
indices
,
update
,
output
,
block_size_
,
input_size_
,
output_size_
,
indices_dim_0_
,
indices_dim_1_
,
indices_stride_
,
work_shape_
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
return
true
;
}
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
size_t
input_num
=
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
if
(
input_num
!=
2
)
{
MS_LOG
(
ERROR
)
<<
"Input number is "
<<
input_num
<<
", but transpose needs 2 input."
;
return
false
;
}
size_t
output_num
=
AnfAlgo
::
GetOutputTensorNum
(
kernel_node
);
if
(
output_num
!=
1
)
{
MS_LOG
(
ERROR
)
<<
"Output number is "
<<
output_num
<<
", but transpose needs 1 output."
;
return
false
;
}
input_shapes_
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
1
);
indices_shapes_
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
output_shapes_
=
AnfAlgo
::
GetOutputInferShape
(
kernel_node
,
0
);
vec_work_shape_
=
GetAttr
<
std
::
vector
<
S
>>
(
kernel_node
,
"shape"
);
GetSize
();
size_t
indices_len
=
sizeof
(
S
)
*
vec_indices_stride_
.
size
();
void
*
indices_stride_work
=
device
::
gpu
::
GPUMemoryAllocator
::
GetInstance
().
AllocTensorMem
(
indices_len
);
if
(
indices_stride_work
==
nullptr
)
{
MS_LOG
(
EXCEPTION
)
<<
"Failed to alloc indices_stride_work, size: "
<<
indices_len
;
}
indices_stride_
=
static_cast
<
S
*>
(
indices_stride_work
);
size_t
vec_work_len
=
sizeof
(
S
)
*
vec_work_shape_
.
size
();
void
*
work_shape_work
=
device
::
gpu
::
GPUMemoryAllocator
::
GetInstance
().
AllocTensorMem
(
vec_work_len
);
if
(
work_shape_work
==
nullptr
)
{
MS_LOG
(
EXCEPTION
)
<<
"Failed to alloc work_shape_work, size: "
<<
vec_work_len
;
}
work_shape_
=
static_cast
<
S
*>
(
work_shape_work
);
CHECK_CUDA_RET_WITH_EXCEPT
(
cudaMemcpy
(
indices_stride_
,
&
vec_indices_stride_
[
0
],
indices_len
,
cudaMemcpyHostToDevice
),
"cudaMemcpy failed in ScatterNdGpuFwdKernel::Init."
);
CHECK_CUDA_RET_WITH_EXCEPT
(
cudaMemcpy
(
work_shape_
,
&
vec_work_shape_
[
0
],
vec_work_len
,
cudaMemcpyHostToDevice
),
"cudaMemcpy failed in ScatterNdGpuFwdKernel::Init."
);
InitSizeLists
();
return
true
;
}
protected:
void
InitSizeLists
()
override
{
input_size_list_
.
push_back
(
indices_size_
);
input_size_list_
.
push_back
(
input_size_
);
output_size_list_
.
push_back
(
output_size_
);
return
;
}
void
GetSize
()
{
indices_size_
=
sizeof
(
S
);
for
(
size_t
i
=
0
;
i
<
indices_shapes_
.
size
();
i
++
)
{
indices_size_
*=
indices_shapes_
[
i
];
}
input_size_
=
sizeof
(
T
);
for
(
size_t
i
=
0
;
i
<
input_shapes_
.
size
();
i
++
)
{
input_size_
*=
input_shapes_
[
i
];
}
output_size_
=
sizeof
(
T
);
for
(
size_t
i
=
0
;
i
<
output_shapes_
.
size
();
i
++
)
{
output_size_
*=
output_shapes_
[
i
];
}
// calculate indices dim 0/1
indices_dim_0_
=
indices_shapes_
[
0
];
indices_dim_1_
=
indices_shapes_
[
1
];
// calculate block_size
for
(
size_t
i
=
indices_dim_1_
;
i
<
output_shapes_
.
size
();
i
++
)
{
block_size_
*=
output_shapes_
[
i
];
}
// calculate indices_stride
for
(
size_t
i
=
0
;
i
<
indices_dim_1_
;
i
++
)
{
vec_indices_stride_
.
push_back
(
0
);
}
vec_indices_stride_
[
indices_dim_1_
-
1
]
=
block_size_
;
for
(
size_t
i
=
indices_dim_1_
-
1
;
i
>
0
;
--
i
)
{
vec_indices_stride_
[
i
-
1
]
=
vec_indices_stride_
[
i
]
*
output_shapes_
[
i
];
}
}
private:
std
::
vector
<
size_t
>
input_shapes_
;
std
::
vector
<
size_t
>
indices_shapes_
;
std
::
vector
<
size_t
>
output_shapes_
;
std
::
vector
<
S
>
vec_indices_stride_
;
std
::
vector
<
S
>
vec_work_shape_
;
std
::
vector
<
size_t
>
input_size_list_
;
std
::
vector
<
size_t
>
output_size_list_
;
std
::
vector
<
size_t
>
workspace_size_list_
;
size_t
input_size_
;
size_t
indices_size_
;
size_t
output_size_
;
size_t
block_size_
;
S
*
indices_stride_
;
S
*
work_shape_
;
size_t
indices_dim_0_
;
size_t
indices_dim_1_
;
};
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_GPU_SCATTER_ND_GPU_KERNEL_H
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cu
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cuh"
template
<
typename
T
>
__global__
void
BoundingBoxDecodeKernel
(
const
size_t
size
,
const
T
*
rois
,
const
T
*
deltas
,
T
*
bboxes
,
const
float
m1
,
const
float
m2
,
const
float
m3
,
const
float
m4
,
const
float
s1
,
const
float
s2
,
const
float
s3
,
const
float
s4
,
const
int
max_height
,
const
int
max_width
,
const
float
ratio_clip
)
{
for
(
size_t
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
size
;
i
+=
gridDim
.
x
*
blockDim
.
x
)
{
const
size_t
left_x
=
i
*
4
;
const
size_t
left_y
=
i
*
4
+
1
;
const
size_t
right_x
=
i
*
4
+
2
;
const
size_t
right_y
=
i
*
4
+
3
;
T
dx
=
deltas
[
left_x
]
*
s1
+
m1
;
T
dy
=
deltas
[
left_y
]
*
s2
+
m2
;
T
dw
=
deltas
[
right_x
]
*
s3
+
m3
;
T
dh
=
deltas
[
right_y
]
*
s4
+
m4
;
T
max_ratio
=
abs
(
log
(
ratio_clip
));
dw
=
dw
>
max_ratio
?
max_ratio
:
(
dw
<
(
-
max_ratio
)
?
(
-
max_ratio
)
:
dw
);
dh
=
dh
>
max_ratio
?
max_ratio
:
(
dh
<
(
-
max_ratio
)
?
(
-
max_ratio
)
:
dh
);
T
px
=
(
rois
[
left_x
]
+
rois
[
right_x
])
*
0.5
f
;
T
py
=
(
rois
[
left_y
]
+
rois
[
right_y
])
*
0.5
f
;
T
pw
=
rois
[
right_x
]
-
rois
[
left_x
]
+
1.0
f
;
T
ph
=
rois
[
right_y
]
-
rois
[
left_y
]
+
1.0
f
;
T
gx
=
px
+
pw
*
dx
;
T
gy
=
py
+
ph
*
dy
;
T
gw
=
pw
*
exp
(
dw
);
T
gh
=
ph
*
exp
(
dh
);
T
x1
=
gx
-
gw
*
0.5
f
+
0.5
f
;
T
y1
=
gy
-
gh
*
0.5
f
+
0.5
f
;
T
x2
=
gx
+
gw
*
0.5
f
-
0.5
f
;
T
y2
=
gy
+
gh
*
0.5
f
-
0.5
f
;
x1
=
x1
>
max_width
?
max_width
:
(
x1
<
0
?
0
:
x1
);
y1
=
y1
>
max_height
?
max_height
:
(
y1
<
0
?
0
:
y1
);
x2
=
x2
>
max_width
?
max_width
:
(
x2
<
0
?
0
:
x2
);
y2
=
y2
>
max_height
?
max_height
:
(
y2
<
0
?
0
:
y2
);
bboxes
[
left_x
]
=
x1
;
bboxes
[
left_y
]
=
y1
;
bboxes
[
right_x
]
=
x2
;
bboxes
[
right_y
]
=
y2
;
}
}
template
<
typename
T
>
void
BoundingBoxDecode
(
const
size_t
size
,
const
T
*
rois
,
const
T
*
deltas
,
T
*
bboxes
,
const
float
&
m1
,
const
float
&
m2
,
const
float
&
m3
,
const
float
&
m4
,
const
float
&
s1
,
const
float
&
s2
,
const
float
&
s3
,
const
float
&
s4
,
const
int
&
max_height
,
const
int
&
max_width
,
const
float
&
ratio_clip
,
cudaStream_t
cuda_stream
)
{
BoundingBoxDecodeKernel
<<<
GET_BLOCKS
(
size
),
GET_THREADS
,
0
,
cuda_stream
>>>
(
size
,
rois
,
deltas
,
bboxes
,
m1
,
m2
,
m3
,
m4
,
s1
,
s2
,
s3
,
s4
,
max_height
,
max_width
,
ratio_clip
);
}
template
void
BoundingBoxDecode
<
float
>(
const
size_t
size
,
const
float
*
rois
,
const
float
*
deltas
,
float
*
bboxes
,
const
float
&
m1
,
const
float
&
m2
,
const
float
&
m3
,
const
float
&
m4
,
const
float
&
s1
,
const
float
&
s2
,
const
float
&
s3
,
const
float
&
s4
,
const
int
&
max_height
,
const
int
&
max_width
,
const
float
&
ratio_clip
,
cudaStream_t
cuda_stream
);
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cuh
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_BOUNDINGBOX_DECODE_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_BOUNDINGBOX_DECODE_IMPL_H_
#include "runtime/device/gpu/cuda_common.h"
template
<
typename
T
>
void
BoundingBoxDecode
(
const
size_t
size
,
const
T
*
rois
,
const
T
*
deltas
,
T
*
bboxes
,
const
float
&
m1
,
const
float
&
m2
,
const
float
&
m3
,
const
float
&
m4
,
const
float
&
s1
,
const
float
&
s2
,
const
float
&
s3
,
const
float
&
s4
,
const
int
&
max_height
,
const
int
&
max_width
,
const
float
&
ratio_clip
,
cudaStream_t
cuda_stream
);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_BOUNDINGBOX_DECODE_IMPL_H_
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/boundingbox_encode_impl.cu
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/boundingbox_encode_impl.cuh"
template
<
typename
T
>
__global__
void
BoundingBoxEncodeKernel
(
const
size_t
size
,
const
T
*
anchor_box
,
const
T
*
groundtruth_box
,
T
*
deltas
,
const
float
m1
,
const
float
m2
,
const
float
m3
,
const
float
m4
,
const
float
s1
,
const
float
s2
,
const
float
s3
,
const
float
s4
)
{
for
(
size_t
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
size
;
i
+=
gridDim
.
x
*
blockDim
.
x
)
{
const
size_t
left_x
=
i
*
4
;
const
size_t
left_y
=
i
*
4
+
1
;
const
size_t
right_x
=
i
*
4
+
2
;
const
size_t
right_y
=
i
*
4
+
3
;
T
px
=
(
anchor_box
[
left_x
]
+
anchor_box
[
right_x
])
*
0.5
f
;
T
py
=
(
anchor_box
[
left_y
]
+
anchor_box
[
right_y
])
*
0.5
f
;
T
pw
=
anchor_box
[
right_x
]
-
anchor_box
[
left_x
]
+
1.0
f
;
T
ph
=
anchor_box
[
right_y
]
-
anchor_box
[
left_y
]
+
1.0
f
;
T
gx
=
(
groundtruth_box
[
left_x
]
+
groundtruth_box
[
right_x
])
*
0.5
f
;
T
gy
=
(
groundtruth_box
[
left_y
]
+
groundtruth_box
[
right_y
])
*
0.5
f
;
T
gw
=
groundtruth_box
[
right_x
]
-
groundtruth_box
[
left_x
]
+
1.0
f
;
T
gh
=
groundtruth_box
[
right_y
]
-
groundtruth_box
[
left_y
]
+
1.0
f
;
T
dx
=
(
gx
-
px
)
/
pw
;
T
dy
=
(
gy
-
py
)
/
ph
;
T
dw
=
log
(
gw
/
pw
);
T
dh
=
log
(
gh
/
ph
);
deltas
[
left_x
]
=
(
dx
-
m1
)
/
s1
;
deltas
[
left_y
]
=
(
dy
-
m2
)
/
s2
;
deltas
[
right_x
]
=
(
dw
-
m3
)
/
s3
;
deltas
[
right_y
]
=
(
dh
-
m4
)
/
s4
;
}
}
template
<
typename
T
>
void
BoundingBoxEncode
(
const
size_t
size
,
const
T
*
anchor_box
,
const
T
*
groundtruth_box
,
T
*
deltas
,
const
float
&
m1
,
const
float
&
m2
,
const
float
&
m3
,
const
float
&
m4
,
const
float
&
s1
,
const
float
&
s2
,
const
float
&
s3
,
const
float
&
s4
,
cudaStream_t
cuda_stream
)
{
BoundingBoxEncodeKernel
<<<
GET_BLOCKS
(
size
),
GET_THREADS
,
0
,
cuda_stream
>>>
(
size
,
anchor_box
,
groundtruth_box
,
deltas
,
m1
,
m2
,
m3
,
m4
,
s1
,
s2
,
s3
,
s4
);
}
template
void
BoundingBoxEncode
<
float
>(
const
size_t
size
,
const
float
*
anchor_box
,
const
float
*
groundtruth_box
,
float
*
deltas
,
const
float
&
m1
,
const
float
&
m2
,
const
float
&
m3
,
const
float
&
m4
,
const
float
&
s1
,
const
float
&
s2
,
const
float
&
s3
,
const
float
&
s4
,
cudaStream_t
cuda_stream
);
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/boundingbox_encode_impl.cuh
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_BOUNDINGBOX_ENCODE_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_BOUNDINGBOX_ENCODE_IMPL_H_
#include "runtime/device/gpu/cuda_common.h"
template
<
typename
T
>
void
BoundingBoxEncode
(
const
size_t
size
,
const
T
*
anchor_box
,
const
T
*
groundtruth_box
,
T
*
deltas
,
const
float
&
m1
,
const
float
&
m2
,
const
float
&
m3
,
const
float
&
m4
,
const
float
&
s1
,
const
float
&
s2
,
const
float
&
s3
,
const
float
&
s4
,
cudaStream_t
cuda_stream
);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_BOUNDINGBOX_ENCODE_IMPL_H_
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu
浏览文件 @
f679568d
...
...
@@ -69,6 +69,25 @@ struct AddFunc {
__device__
__forceinline__
S
operator
()(
const
T
&
lhs
,
const
T
&
rhs
)
{
return
(
lhs
+
rhs
);
}
};
template
<
typename
T
,
typename
S
>
struct
FloorDivFunc
{
__device__
__forceinline__
S
operator
()(
const
T
&
lhs
,
const
T
&
rhs
)
{
return
floor
(
static_cast
<
float
>
(
lhs
/
rhs
));
}
};
template
<
>
struct
FloorDivFunc
<
half
,
half
>
{
__device__
__forceinline__
half
operator
()(
const
half
&
lhs
,
const
half
&
rhs
)
{
return
__float2half
(
floor
(
__half2float
(
lhs
)
/
__half2float
(
rhs
)));
}
};
template
<
>
struct
FloorDivFunc
<
half
,
bool
>
{
// invalid branch
__device__
__forceinline__
half
operator
()(
const
half
&
lhs
,
const
half
&
rhs
)
{
return
false
;
}
};
template
<
>
struct
PowerFunc
<
half
,
bool
>
{
// invalid branch
...
...
@@ -77,6 +96,7 @@ struct PowerFunc<half, bool> {
__device__
__forceinline__
int
Index
(
const
int
&
index
,
const
int
&
dim
)
{
return
dim
==
1
?
0
:
index
;
}
template
<
typename
T
,
typename
S
,
typename
Func
>
__device__
__forceinline__
void
BroadcastOperator
(
const
int
&
l0
,
const
int
&
l1
,
const
int
&
l2
,
const
int
&
l3
,
const
int
&
r0
,
const
int
&
r1
,
const
int
&
r2
,
const
int
&
r3
,
...
...
@@ -116,16 +136,19 @@ __global__ void BroadcastKernel(const int l0, const int l1, const int l2, const
output
);
case
BROADCAST_TYPE_REALDIV
:
return
BroadcastOperator
<
T
,
S
,
RealDivFunc
<
T
,
S
>>
(
l0
,
l1
,
l2
,
l3
,
r0
,
r1
,
r2
,
r3
,
d0
,
d1
,
d2
,
d3
,
input0
,
input1
,
output
);
output
);
case
BROADCAST_TYPE_MUL
:
return
BroadcastOperator
<
T
,
S
,
MulFunc
<
T
,
S
>>
(
l0
,
l1
,
l2
,
l3
,
r0
,
r1
,
r2
,
r3
,
d0
,
d1
,
d2
,
d3
,
input0
,
input1
,
output
);
output
);
case
BROADCAST_TYPE_SUB
:
return
BroadcastOperator
<
T
,
S
,
SubFunc
<
T
,
S
>>
(
l0
,
l1
,
l2
,
l3
,
r0
,
r1
,
r2
,
r3
,
d0
,
d1
,
d2
,
d3
,
input0
,
input1
,
output
);
output
);
case
BROADCAST_TYPE_ADD
:
return
BroadcastOperator
<
T
,
S
,
AddFunc
<
T
,
S
>>
(
l0
,
l1
,
l2
,
l3
,
r0
,
r1
,
r2
,
r3
,
d0
,
d1
,
d2
,
d3
,
input0
,
input1
,
output
);
output
);
case
BROADCAST_TYPE_FLOORDIV
:
return
BroadcastOperator
<
T
,
S
,
FloorDivFunc
<
T
,
S
>>
(
l0
,
l1
,
l2
,
l3
,
r0
,
r1
,
r2
,
r3
,
d0
,
d1
,
d2
,
d3
,
input0
,
input1
,
output
);
}
}
...
...
@@ -167,6 +190,8 @@ __global__ void NoBroadcastKernel(const int nums, enum BroadcastOpType op, const
return
NoBroadcastOperator
<
T
,
S
,
SubFunc
<
T
,
S
>>
(
nums
,
input0
,
input1
,
output
);
case
BROADCAST_TYPE_ADD
:
return
NoBroadcastOperator
<
T
,
S
,
AddFunc
<
T
,
S
>>
(
nums
,
input0
,
input1
,
output
);
case
BROADCAST_TYPE_FLOORDIV
:
return
NoBroadcastOperator
<
T
,
S
,
FloorDivFunc
<
T
,
S
>>
(
nums
,
input0
,
input1
,
output
);
}
}
...
...
@@ -195,7 +220,7 @@ void BroadcastTo(const int &i0, const int &i1, const int &i2, const int &i3, con
const
int
&
o2
,
const
int
&
o3
,
const
T
*
input_addr
,
T
*
output_addr
,
cudaStream_t
stream
)
{
int
nums
=
o0
*
o1
*
o2
*
o3
;
BroadcastToKernel
<<<
GET_BLOCKS
(
nums
),
GET_THREADS
,
0
,
stream
>>>
(
i0
,
i1
,
i2
,
i3
,
o0
,
o1
,
o2
,
o3
,
input_addr
,
output_addr
);
output_addr
);
}
template
void
Broadcast
(
const
int
&
l0
,
const
int
&
l1
,
const
int
&
l2
,
const
int
&
l3
,
const
int
&
r0
,
const
int
&
r1
,
...
...
@@ -226,9 +251,8 @@ template void NoBroadcast(const int &nums, enum BroadcastOpType op, const half *
bool
*
output
,
cudaStream_t
stream
);
template
void
NoBroadcast
(
const
int
&
nums
,
enum
BroadcastOpType
op
,
const
half
*
input0
,
const
half
*
input1
,
half
*
output
,
cudaStream_t
stream
);
template
void
NoBroadcast
(
const
int
&
nums
,
enum
BroadcastOpType
op
,
const
int
*
input0
,
const
int
*
input1
,
int
*
output
,
cudaStream_t
stream
);
template
void
NoBroadcast
(
const
int
&
nums
,
enum
BroadcastOpType
op
,
const
int
*
input0
,
const
int
*
input1
,
int
*
output
,
cudaStream_t
stream
);
template
void
BroadcastTo
(
const
int
&
i0
,
const
int
&
i1
,
const
int
&
i2
,
const
int
&
i3
,
const
int
&
o0
,
const
int
&
o1
,
const
int
&
o2
,
const
int
&
o3
,
const
float
*
input_addr
,
float
*
output_addr
,
cudaStream_t
stream
);
...
...
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh
浏览文件 @
f679568d
...
...
@@ -29,6 +29,7 @@ enum BroadcastOpType {
BROADCAST_TYPE_MUL
=
6
,
BROADCAST_TYPE_SUB
=
7
,
BROADCAST_TYPE_ADD
=
8
,
BROADCAST_TYPE_FLOORDIV
=
9
,
BROADCAST_TYPE_INVALID
=
0xffffffff
,
};
...
...
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gathernd.cu
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/gathernd.cuh"
#include "runtime/device/gpu/cuda_common.h"
template
<
typename
T
,
typename
S
>
__global__
void
GatherNdKernel
(
T
*
input
,
S
*
indices
,
T
*
output
,
const
size_t
output_dim0
,
const
size_t
output_dim1
,
const
size_t
indices_dim1
,
S
*
batch_indices
,
S
*
batch_strides
)
{
int
num
=
output_dim0
*
output_dim1
;
int
i
,
j
;
for
(
int
write_index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
write_index
<
num
;
write_index
+=
blockDim
.
x
*
gridDim
.
x
)
{
i
=
write_index
/
output_dim1
%
output_dim0
;
j
=
write_index
%
output_dim1
;
bool
out_of_bound
=
false
;
int
read_index
=
0
;
int
indices_i
=
0
;
for
(
size_t
k
=
0
;
k
<
indices_dim1
;
k
++
)
{
size_t
ind
=
indices_dim1
*
i
+
k
;
indices_i
=
indices
[
ind
];
out_of_bound
|=
!
(
indices_i
<
batch_indices
[
k
]);
read_index
+=
indices_i
*
batch_strides
[
k
];
}
read_index
+=
j
;
if
(
!
out_of_bound
)
{
output
[
write_index
]
=
input
[
read_index
];
}
else
{
output
[
write_index
]
=
0
;
}
}
return
;
}
template
<
typename
T
,
typename
S
>
void
GatherNd
(
T
*
input
,
S
*
indices
,
T
*
output
,
const
size_t
&
output_dim0
,
const
size_t
&
output_dim1
,
const
size_t
&
indices_dim1
,
S
*
batch_indices
,
S
*
batch_strides
,
cudaStream_t
stream
)
{
int
size
=
output_dim0
*
output_dim1
;
GatherNdKernel
<<<
GET_BLOCKS
(
size
),
GET_THREADS
,
0
,
stream
>>>
(
input
,
indices
,
output
,
output_dim0
,
output_dim1
,
indices_dim1
,
batch_indices
,
batch_strides
);
return
;
}
template
void
GatherNd
<
float
,
int
>(
float
*
input
,
int
*
indices
,
float
*
output
,
const
size_t
&
output_dim0
,
const
size_t
&
output_dim1
,
const
size_t
&
indices_dim1
,
int
*
batch_indices
,
int
*
batch_strides
,
cudaStream_t
stream
);
template
void
GatherNd
<
half
,
int
>(
half
*
input
,
int
*
indices
,
half
*
output
,
const
size_t
&
output_dim0
,
const
size_t
&
output_dim1
,
const
size_t
&
indices_dim1
,
int
*
batch_indices
,
int
*
batch_strides
,
cudaStream_t
stream
);
template
void
GatherNd
<
int
,
int
>(
int
*
input
,
int
*
indices
,
int
*
output
,
const
size_t
&
output_dim0
,
const
size_t
&
output_dim1
,
const
size_t
&
indices_dim1
,
int
*
batch_indices
,
int
*
batch_strides
,
cudaStream_t
stream
);
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gathernd.cuh
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_GATHERND_GPU_CU_H
#define MINDSPORE_GATHERND_GPU_CU_H
#include "runtime/device/gpu/cuda_common.h"
template
<
typename
T
,
typename
S
>
void
GatherNd
(
T
*
input
,
S
*
indices
,
T
*
output
,
const
size_t
&
output_dim0
,
const
size_t
&
output_dim1
,
const
size_t
&
indices_dim1
,
S
*
batch_indices
,
S
*
batch_strides
,
cudaStream_t
stream
);
#endif // MINDSPORE_GATHERND_GPU_CU_H
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cu
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cuh"
#include "runtime/device/gpu/cuda_common.h"
template
<
typename
T
,
typename
S
>
__global__
void
ScatterNdKernel
(
S
*
indices
,
T
*
update
,
T
*
output
,
const
size_t
block_size
,
const
size_t
input_size
,
const
size_t
output_size
,
const
size_t
indices_dim_0
,
const
size_t
indices_dim_1
,
S
*
indices_stride
,
S
*
work_shape
)
{
int
i
,
j
;
for
(
int
read_index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
read_index
<
input_size
;
read_index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
write_index
=
0
;
bool
out_bound
=
false
;
i
=
read_index
/
block_size
;
j
=
read_index
%
block_size
;
for
(
size_t
k
=
0
;
k
<
indices_dim_1
;
k
++
)
{
S
indices_i
=
indices
[
i
*
indices_dim_1
+
k
];
out_bound
|=
indices_i
>=
work_shape
[
k
];
write_index
+=
indices_i
*
indices_stride
[
k
];
}
write_index
+=
j
;
out_bound
|=
write_index
>=
output_size
;
if
(
!
out_bound
)
{
output
[
write_index
]
=
update
[
read_index
];
}
}
}
template
<
typename
T
,
typename
S
>
void
ScatterNd
(
S
*
indices
,
T
*
update
,
T
*
output
,
const
size_t
&
block_size
,
const
size_t
&
input_size
,
const
size_t
&
output_size
,
const
size_t
&
indices_dim_0
,
const
size_t
&
indices_dim_1
,
S
*
indices_stride
,
S
*
work_shape
,
cudaStream_t
stream
)
{
ScatterNdKernel
<<<
GET_BLOCKS
(
input_size
),
GET_THREADS
,
0
,
stream
>>>
(
indices
,
update
,
output
,
block_size
,
input_size
,
output_size
,
indices_dim_0
,
indices_dim_1
,
indices_stride
,
work_shape
);
return
;
}
template
void
ScatterNd
<
float
,
int
>(
int
*
indices
,
float
*
update
,
float
*
output
,
const
size_t
&
block_size
,
const
size_t
&
input_size
,
const
size_t
&
output_size
,
const
size_t
&
indices_dim_0
,
const
size_t
&
indices_dim_1
,
int
*
indices_stride
,
int
*
work_shape
,
cudaStream_t
stream
);
template
void
ScatterNd
<
half
,
int
>(
int
*
indices
,
half
*
update
,
half
*
output
,
const
size_t
&
block_size
,
const
size_t
&
input_size
,
const
size_t
&
output_size
,
const
size_t
&
indices_dim_0
,
const
size_t
&
indices_dim_1
,
int
*
indices_stride
,
int
*
work_shape
,
cudaStream_t
stream
);
template
void
ScatterNd
<
int
,
int
>(
int
*
indices
,
int
*
update
,
int
*
output
,
const
size_t
&
block_size
,
const
size_t
&
input_size
,
const
size_t
&
output_size
,
const
size_t
&
indices_dim_0
,
const
size_t
&
indices_dim_1
,
int
*
indices_stride
,
int
*
work_shape
,
cudaStream_t
stream
);
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd.cuh
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_SCATTER_ND_GPU_CU_H
#define MINDSPORE_SCATTER_ND_GPU_CU_H
#include "runtime/device/gpu/cuda_common.h"
template
<
typename
T
,
typename
S
>
void
ScatterNd
(
S
*
indices
,
T
*
update
,
T
*
output
,
const
size_t
&
block_size
,
const
size_t
&
input_size
,
const
size_t
&
output_size
,
const
size_t
&
indices_dim_0
,
const
size_t
&
indices_dim_1
,
S
*
indices_stride
,
S
*
work_shape
,
cudaStream_t
stream
);
#endif // MINDSPORE_SCATTER_ND_GPU_CU_H
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sgd_impl.cu
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include "backend/kernel_compiler/gpu/cuda_impl/sgd_impl.cuh"
template
<
typename
T
>
__global__
void
SGDKernel
(
const
int
size
,
const
T
dampening
,
const
T
weight_decay
,
const
bool
nesterov
,
const
T
*
grad
,
const
T
*
momentum
,
const
T
*
lr
,
T
*
param
,
T
*
accum
,
T
*
stat
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
(
size
);
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
T
grad_new
=
grad
[
i
];
if
(
weight_decay
!=
static_cast
<
T
>
(
0
))
{
grad_new
+=
param
[
i
]
*
weight_decay
;
}
if
(
momentum
[
0
]
!=
static_cast
<
T
>
(
0
))
{
if
(
stat
[
i
]
==
static_cast
<
T
>
(
0
))
{
accum
[
i
]
=
grad_new
;
stat
[
i
]
=
0
;
}
else
{
accum
[
i
]
=
accum
[
i
]
*
momentum
[
0
]
+
(
1.0
-
dampening
)
*
grad_new
;
}
if
(
nesterov
)
{
grad_new
+=
accum
[
i
]
*
momentum
[
0
];
}
else
{
grad_new
=
accum
[
i
];
}
}
param
[
i
]
-=
lr
[
0
]
*
grad_new
;
}
}
template
<
typename
T
>
void
SGD
(
const
int
size
,
const
T
dampening
,
const
T
weight_decay
,
const
bool
nesterov
,
const
T
*
lr
,
const
T
*
momentum
,
const
T
*
grad
,
T
*
param
,
T
*
accum
,
T
*
stat
,
cudaStream_t
cuda_stream
)
{
SGDKernel
<<<
GET_BLOCKS
(
size
),
GET_THREADS
,
0
,
cuda_stream
>>>
(
size
,
dampening
,
weight_decay
,
nesterov
,
grad
,
momentum
,
lr
,
param
,
accum
,
stat
);
}
template
void
SGD
(
const
int
size
,
const
float
dampening
,
const
float
weight_decay
,
const
bool
nesterov
,
const
float
*
lr
,
const
float
*
momentum
,
const
float
*
grad
,
float
*
param
,
float
*
accum
,
float
*
stat
,
cudaStream_t
cuda_stream
);
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sgd_impl.cuh
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SGD_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SGD_IMPL_H_
#include "runtime/device/gpu/cuda_common.h"
template
<
typename
T
>
void
SGD
(
const
int
size
,
const
T
dampening
,
const
T
weight_decay
,
const
bool
nesterov
,
const
T
*
lr
,
const
T
*
momentum
,
const
T
*
grad
,
T
*
param
,
T
*
accum
,
T
*
stat
,
cudaStream_t
cuda_stream
);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SGD_IMPL_H_
mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc
浏览文件 @
f679568d
...
...
@@ -51,6 +51,10 @@ MS_REG_GPU_KERNEL_TWO(
TensorAdd
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
BroadcastOpGpuKernel
,
float
,
float
)
MS_REG_GPU_KERNEL_TWO
(
FloorDiv
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
BroadcastOpGpuKernel
,
float
,
float
)
// fp16
MS_REG_GPU_KERNEL_TWO
(
...
...
@@ -85,6 +89,10 @@ MS_REG_GPU_KERNEL_TWO(
TensorAdd
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
BroadcastOpGpuKernel
,
half
,
half
)
MS_REG_GPU_KERNEL_TWO
(
FloorDiv
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat16
).
AddInputAttr
(
kNumberTypeFloat16
).
AddOutputAttr
(
kNumberTypeFloat16
),
BroadcastOpGpuKernel
,
half
,
half
)
// int32
MS_REG_GPU_KERNEL_TWO
(
...
...
mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
浏览文件 @
f679568d
...
...
@@ -96,10 +96,10 @@ class BroadcastOpGpuKernel : public GpuKernel {
std
::
string
kernel_name
=
AnfAlgo
::
GetCNodeName
(
kernel_node
);
static
std
::
map
<
std
::
string
,
BroadcastOpType
>
kBroadcastTypeMap
=
{
{
"Greater"
,
BROADCAST_TYPE_GREATER
},
{
"Less"
,
BROADCAST_TYPE_LESS
},
{
"Maximum"
,
BROADCAST_TYPE_MAXIMUM
},
{
"Minimum"
,
BROADCAST_TYPE_MINIMUM
},
{
"Pow"
,
BROADCAST_TYPE_POWER
},
{
"RealDiv"
,
BROADCAST_TYPE_REALDIV
},
{
"
FloorDiv"
,
BROADCAST_TYPE_REALDIV
},
{
"Mul"
,
BROADCAST_TYPE_MUL
},
{
"Sub"
,
BROADCAST_TYPE_SUB
},
{
"
TensorAdd"
,
BROADCAST_TYPE_ADD
},
{
"Greater"
,
BROADCAST_TYPE_GREATER
},
{
"Less"
,
BROADCAST_TYPE_LESS
},
{
"Maximum"
,
BROADCAST_TYPE_MAXIMUM
},
{
"Minimum"
,
BROADCAST_TYPE_MINIMUM
},
{
"Pow"
,
BROADCAST_TYPE_POWER
},
{
"RealDiv"
,
BROADCAST_TYPE_REALDIV
},
{
"
Mul"
,
BROADCAST_TYPE_MUL
},
{
"Sub"
,
BROADCAST_TYPE_SUB
},
{
"TensorAdd"
,
BROADCAST_TYPE_ADD
},
{
"
FloorDiv"
,
BROADCAST_TYPE_FLOORDIV
},
};
auto
iter
=
kBroadcastTypeMap
.
find
(
kernel_name
);
...
...
mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sgd_gpu_kernel.cc
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/nn/sgd_gpu_kernel.h"
namespace
mindspore
{
namespace
kernel
{
MS_REG_GPU_KERNEL_ONE
(
SGD
,
KernelAttr
()
.
AddInputAttr
(
kNumberTypeFloat32
)
.
AddInputAttr
(
kNumberTypeFloat32
)
.
AddInputAttr
(
kNumberTypeFloat32
)
.
AddInputAttr
(
kNumberTypeFloat32
)
.
AddInputAttr
(
kNumberTypeFloat32
)
.
AddInputAttr
(
kNumberTypeFloat32
)
.
AddOutputAttr
(
kNumberTypeFloat32
),
SGDGpuKernel
,
float
)
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sgd_gpu_kernel.h
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_SGD_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_NN_SGD_KERNEL_H_
#include <vector>
#include "backend/kernel_compiler/gpu/cuda_impl/sgd_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
namespace
mindspore
{
namespace
kernel
{
template
<
typename
T
>
class
SGDGpuKernel
:
public
GpuKernel
{
public:
SGDGpuKernel
()
:
size_
(
1
),
dampening_
(
0.0
),
weight_decay_
(
0.0
),
nesterov_
(
false
)
{}
~
SGDGpuKernel
()
override
=
default
;
const
std
::
vector
<
size_t
>
&
GetInputSizeList
()
const
override
{
return
input_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetOutputSizeList
()
const
override
{
return
output_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetWorkspaceSizeList
()
const
override
{
return
workspace_size_list_
;
}
bool
Launch
(
const
std
::
vector
<
AddressPtr
>
&
inputs
,
const
std
::
vector
<
AddressPtr
>
&
,
const
std
::
vector
<
AddressPtr
>
&
outputs
,
void
*
stream
)
override
{
T
*
param
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
T
*
grad
=
GetDeviceAddress
<
T
>
(
inputs
,
1
);
T
*
lr
=
GetDeviceAddress
<
T
>
(
inputs
,
2
);
T
*
accum
=
GetDeviceAddress
<
T
>
(
inputs
,
3
);
T
*
momentum
=
GetDeviceAddress
<
T
>
(
inputs
,
4
);
T
*
stat
=
GetDeviceAddress
<
T
>
(
inputs
,
5
);
SGD
(
size_
,
dampening_
,
weight_decay_
,
nesterov_
,
lr
,
momentum
,
grad
,
param
,
accum
,
stat
,
reinterpret_cast
<
cudaStream_t
>
(
stream
));
return
true
;
}
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
dampening_
=
GetAttr
<
float
>
(
kernel_node
,
"dampening"
);
weight_decay_
=
GetAttr
<
float
>
(
kernel_node
,
"weight_decay"
);
nesterov_
=
GetAttr
<
bool
>
(
kernel_node
,
"nesterov"
);
auto
input_shape
=
AnfAlgo
::
GetOutputInferShape
(
kernel_node
,
0
);
for
(
auto
&
dim
:
input_shape
)
{
size_
*=
dim
;
}
InitSizeLists
();
return
true
;
}
protected:
void
InitSizeLists
()
override
{
size_t
input_size
=
size_
*
sizeof
(
T
);
input_size_list_
.
push_back
(
input_size
);
// parameter
input_size_list_
.
push_back
(
input_size
);
// gradient
input_size_list_
.
push_back
(
sizeof
(
T
));
// lr
input_size_list_
.
push_back
(
input_size
);
// accum
input_size_list_
.
push_back
(
sizeof
(
T
));
// momentum
input_size_list_
.
push_back
(
input_size
);
// stat
output_size_list_
.
push_back
(
input_size
);
}
private:
size_t
size_
;
float
dampening_
;
float
weight_decay_
;
bool
nesterov_
;
std
::
vector
<
size_t
>
input_size_list_
;
std
::
vector
<
size_t
>
output_size_list_
;
std
::
vector
<
size_t
>
workspace_size_list_
;
};
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_GPU_NN_SGD_KERNEL_H_
mindspore/ccsrc/backend/kernel_compiler/gpu/other/boundingbox_decode_gpu_kernel.cc
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/other/boundingbox_decode_gpu_kernel.h"
namespace
mindspore
{
namespace
kernel
{
MS_REG_GPU_KERNEL_ONE
(
BoundingBoxDecode
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
BoundingBoxDecodeGpuKernel
,
float
)
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/backend/kernel_compiler/gpu/other/boundingbox_decode_gpu_kernel.h
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_DECODE_GPU_KERNEL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_DECODE_GPU_KERNEL_H
#include <vector>
#include "backend/kernel_compiler/gpu/cuda_impl/boundingbox_decode_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
namespace
mindspore
{
namespace
kernel
{
template
<
typename
T
>
class
BoundingBoxDecodeGpuKernel
:
public
GpuKernel
{
public:
BoundingBoxDecodeGpuKernel
()
:
rois_size_
(
0
),
deltas_size_
(
0
),
bboxes_size_
(
0
),
wh_ratio_clip_
(
0.016
)
{}
~
BoundingBoxDecodeGpuKernel
()
override
=
default
;
const
std
::
vector
<
size_t
>
&
GetInputSizeList
()
const
override
{
return
input_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetOutputSizeList
()
const
override
{
return
output_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetWorkspaceSizeList
()
const
override
{
return
workspace_size_list_
;
}
bool
Launch
(
const
std
::
vector
<
AddressPtr
>
&
inputs
,
const
std
::
vector
<
AddressPtr
>
&
workspace
,
const
std
::
vector
<
AddressPtr
>
&
outputs
,
void
*
stream_ptr
)
override
{
T
*
rois_addr
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
T
*
deltas_addr
=
GetDeviceAddress
<
T
>
(
inputs
,
1
);
T
*
bboxes_addr
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
if
(
inputs
[
0
]
->
size
!=
inputs
[
1
]
->
size
)
{
MS_LOG
(
ERROR
)
<<
"Rois box size must equal with deltas box size -"
<<
inputs
[
1
]
->
size
<<
", but got"
<<
inputs
[
0
]
->
size
;
return
false
;
}
const
size_t
coordinate
=
4
;
const
size_t
block_size
=
inputs
[
0
]
->
size
/
sizeof
(
T
);
if
((
block_size
%
coordinate
)
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"The size of the box must be a multiple of 4."
;
return
false
;
}
BoundingBoxDecode
(
block_size
/
coordinate
,
rois_addr
,
deltas_addr
,
bboxes_addr
,
means_
[
0
],
means_
[
1
],
means_
[
2
],
means_
[
3
],
stds_
[
0
],
stds_
[
1
],
stds_
[
2
],
stds_
[
3
],
max_shape_
[
0
],
max_shape_
[
1
],
wh_ratio_clip_
,
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
return
true
;
}
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
size_t
input_num
=
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
if
(
input_num
!=
2
)
{
MS_LOG
(
ERROR
)
<<
"Input number is "
<<
input_num
<<
", but BoundingBoxDecode needs 2 inputs."
;
return
false
;
}
rois_size_
=
sizeof
(
T
);
deltas_size_
=
sizeof
(
T
);
bboxes_size_
=
sizeof
(
T
);
auto
logits_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
for
(
size_t
i
=
0
;
i
<
logits_shape
.
size
();
i
++
)
{
rois_size_
*=
logits_shape
[
i
];
}
auto
labels_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
1
);
for
(
size_t
i
=
0
;
i
<
labels_shape
.
size
();
i
++
)
{
deltas_size_
*=
labels_shape
[
i
];
}
auto
output_shape
=
AnfAlgo
::
GetOutputInferShape
(
kernel_node
,
0
);
for
(
size_t
i
=
0
;
i
<
output_shape
.
size
();
i
++
)
{
bboxes_size_
*=
output_shape
[
i
];
}
InitSizeLists
();
const
size_t
coordinate_size
=
4
;
if
(
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"means"
)
->
isa
<
ValueTuple
>
()
||
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"means"
)
->
isa
<
ValueList
>
())
{
means_
=
GetAttr
<
std
::
vector
<
float
>>
(
kernel_node
,
"means"
);
}
else
if
(
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"means"
)
->
isa
<
FloatImm
>
())
{
float
mean
=
GetAttr
<
int
>
(
kernel_node
,
"means"
);
for
(
size_t
i
=
0
;
i
<
coordinate_size
;
i
++
)
{
means_
.
emplace_back
(
mean
);
}
}
else
{
MS_LOG
(
EXCEPTION
)
<<
"Attribute means type is invalid."
;
}
if
(
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"stds"
)
->
isa
<
ValueTuple
>
()
||
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"stds"
)
->
isa
<
ValueList
>
())
{
stds_
=
GetAttr
<
std
::
vector
<
float
>>
(
kernel_node
,
"stds"
);
}
else
if
(
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"stds"
)
->
isa
<
FloatImm
>
())
{
float
std
=
GetAttr
<
int
>
(
kernel_node
,
"stds"
);
for
(
size_t
i
=
0
;
i
<
coordinate_size
;
i
++
)
{
stds_
.
emplace_back
(
std
);
}
}
else
{
MS_LOG
(
EXCEPTION
)
<<
"Attribute stds type is invalid."
;
}
max_shape_
=
GetAttr
<
std
::
vector
<
int
>>
(
kernel_node
,
"max_shape"
);
wh_ratio_clip_
=
GetAttr
<
float
>
(
kernel_node
,
"wh_ratio_clip"
);
if
(
means_
.
size
()
<
coordinate_size
||
stds_
.
size
()
<
coordinate_size
)
{
MS_LOG
(
EXCEPTION
)
<<
"The size of means or stds is less than 4."
;
}
if
(
max_shape_
.
size
()
<
2
)
{
MS_LOG
(
EXCEPTION
)
<<
"The size of max_shape is less than 2."
;
}
return
true
;
}
protected:
void
InitSizeLists
()
override
{
input_size_list_
.
push_back
(
rois_size_
);
input_size_list_
.
push_back
(
deltas_size_
);
output_size_list_
.
push_back
(
bboxes_size_
);
}
private:
size_t
rois_size_
;
size_t
deltas_size_
;
size_t
bboxes_size_
;
std
::
vector
<
float
>
means_
;
std
::
vector
<
float
>
stds_
;
std
::
vector
<
int
>
max_shape_
;
float
wh_ratio_clip_
;
std
::
vector
<
size_t
>
input_size_list_
;
std
::
vector
<
size_t
>
output_size_list_
;
std
::
vector
<
size_t
>
workspace_size_list_
;
};
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_DECODE_GPU_KERNEL_H
mindspore/ccsrc/backend/kernel_compiler/gpu/other/boundingbox_encode_gpu_kernel.cc
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/other/boundingbox_encode_gpu_kernel.h"
namespace
mindspore
{
namespace
kernel
{
MS_REG_GPU_KERNEL_ONE
(
BoundingBoxEncode
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
BoundingBoxEncodeGpuKernel
,
float
)
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/backend/kernel_compiler/gpu/other/boundingbox_encode_gpu_kernel.h
0 → 100644
浏览文件 @
f679568d
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_ENCODE_GPU_KERNEL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_ENCODE_GPU_KERNEL_H
#include <vector>
#include "backend/kernel_compiler/gpu/cuda_impl/boundingbox_encode_impl.cuh"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
namespace
mindspore
{
namespace
kernel
{
template
<
typename
T
>
class
BoundingBoxEncodeGpuKernel
:
public
GpuKernel
{
public:
BoundingBoxEncodeGpuKernel
()
:
anchor_size_
(
0
),
groundtruth_size_
(
0
),
deltas_size_
(
0
)
{}
~
BoundingBoxEncodeGpuKernel
()
override
=
default
;
const
std
::
vector
<
size_t
>
&
GetInputSizeList
()
const
override
{
return
input_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetOutputSizeList
()
const
override
{
return
output_size_list_
;
}
const
std
::
vector
<
size_t
>
&
GetWorkspaceSizeList
()
const
override
{
return
workspace_size_list_
;
}
bool
Launch
(
const
std
::
vector
<
AddressPtr
>
&
inputs
,
const
std
::
vector
<
AddressPtr
>
&
workspace
,
const
std
::
vector
<
AddressPtr
>
&
outputs
,
void
*
stream_ptr
)
override
{
T
*
anchor_addr
=
GetDeviceAddress
<
T
>
(
inputs
,
0
);
T
*
groundtruth_addr
=
GetDeviceAddress
<
T
>
(
inputs
,
1
);
T
*
deltas_addr
=
GetDeviceAddress
<
T
>
(
outputs
,
0
);
if
(
inputs
[
0
]
->
size
!=
inputs
[
1
]
->
size
)
{
MS_LOG
(
ERROR
)
<<
"Anchor box size must equal with groundtruth box size -"
<<
inputs
[
1
]
->
size
<<
", but got"
<<
inputs
[
0
]
->
size
;
return
false
;
}
const
size_t
coordinate
=
4
;
const
size_t
block_size
=
inputs
[
0
]
->
size
/
sizeof
(
T
);
if
((
block_size
%
coordinate
)
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"The size of the box must be a multiple of 4."
;
return
false
;
}
BoundingBoxEncode
(
block_size
/
coordinate
,
anchor_addr
,
groundtruth_addr
,
deltas_addr
,
means_
[
0
],
means_
[
1
],
means_
[
2
],
means_
[
3
],
stds_
[
0
],
stds_
[
1
],
stds_
[
2
],
stds_
[
3
],
reinterpret_cast
<
cudaStream_t
>
(
stream_ptr
));
return
true
;
}
bool
Init
(
const
CNodePtr
&
kernel_node
)
override
{
size_t
input_num
=
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
if
(
input_num
!=
2
)
{
MS_LOG
(
ERROR
)
<<
"Input number is "
<<
input_num
<<
", but BoundingBoxEncode needs 2 inputs."
;
return
false
;
}
anchor_size_
=
sizeof
(
T
);
groundtruth_size_
=
sizeof
(
T
);
deltas_size_
=
sizeof
(
T
);
auto
logits_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
for
(
size_t
i
=
0
;
i
<
logits_shape
.
size
();
i
++
)
{
anchor_size_
*=
logits_shape
[
i
];
}
auto
labels_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
1
);
for
(
size_t
i
=
0
;
i
<
labels_shape
.
size
();
i
++
)
{
groundtruth_size_
*=
labels_shape
[
i
];
}
auto
output_shape
=
AnfAlgo
::
GetOutputInferShape
(
kernel_node
,
0
);
for
(
size_t
i
=
0
;
i
<
output_shape
.
size
();
i
++
)
{
deltas_size_
*=
output_shape
[
i
];
}
InitSizeLists
();
const
size_t
coordinate_size
=
4
;
if
(
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"means"
)
->
isa
<
ValueTuple
>
()
||
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"means"
)
->
isa
<
ValueList
>
())
{
means_
=
GetAttr
<
std
::
vector
<
float
>>
(
kernel_node
,
"means"
);
}
else
if
(
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"means"
)
->
isa
<
FloatImm
>
())
{
float
mean
=
GetAttr
<
int
>
(
kernel_node
,
"means"
);
for
(
size_t
i
=
0
;
i
<
coordinate_size
;
i
++
)
{
means_
.
emplace_back
(
mean
);
}
}
else
{
MS_LOG
(
EXCEPTION
)
<<
"Attribute means type is invalid."
;
}
if
(
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"stds"
)
->
isa
<
ValueTuple
>
()
||
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"stds"
)
->
isa
<
ValueList
>
())
{
stds_
=
GetAttr
<
std
::
vector
<
float
>>
(
kernel_node
,
"stds"
);
}
else
if
(
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"stds"
)
->
isa
<
FloatImm
>
())
{
float
std
=
GetAttr
<
int
>
(
kernel_node
,
"stds"
);
for
(
size_t
i
=
0
;
i
<
coordinate_size
;
i
++
)
{
stds_
.
emplace_back
(
std
);
}
}
else
{
MS_LOG
(
EXCEPTION
)
<<
"Attribute stds type is invalid."
;
}
if
(
means_
.
size
()
<
coordinate_size
||
stds_
.
size
()
<
coordinate_size
)
{
MS_LOG
(
EXCEPTION
)
<<
"The size of means or stds is less than 4."
;
}
return
true
;
}
protected:
void
InitSizeLists
()
override
{
input_size_list_
.
push_back
(
anchor_size_
);
input_size_list_
.
push_back
(
groundtruth_size_
);
output_size_list_
.
push_back
(
deltas_size_
);
}
private:
size_t
anchor_size_
;
size_t
groundtruth_size_
;
size_t
deltas_size_
;
std
::
vector
<
float
>
means_
;
std
::
vector
<
float
>
stds_
;
std
::
vector
<
size_t
>
input_size_list_
;
std
::
vector
<
size_t
>
output_size_list_
;
std
::
vector
<
size_t
>
workspace_size_list_
;
};
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_GPU_OTHER_BOUNDINGBOX_ENCODE_GPU_KERNEL_H
tests/st/ops/gpu/test_boundingbox_decode_op.py
0 → 100644
浏览文件 @
f679568d
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import
numpy
as
np
import
pytest
import
mindspore
import
mindspore.context
as
context
import
mindspore.nn
as
nn
from
mindspore
import
Tensor
from
mindspore.ops
import
operations
as
P
class
NetBoundingBoxDecode
(
nn
.
Cell
):
def
__init__
(
self
,
means
=
(
0.0
,
0.0
,
0.0
,
0.0
),
stds
=
(
1.0
,
1.0
,
1.0
,
1.0
)):
super
(
NetBoundingBoxDecode
,
self
).
__init__
()
self
.
decode
=
P
.
BoundingBoxDecode
(
max_shape
=
(
768
,
1280
),
means
=
means
,
stds
=
stds
,
wh_ratio_clip
=
0.016
)
def
construct
(
self
,
anchor
,
groundtruth
):
return
self
.
decode
(
anchor
,
groundtruth
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_gpu_training
@
pytest
.
mark
.
env_onecard
def
test_boundingbox_decode
():
anchor
=
np
.
array
([[
4
,
1
,
2
,
1
],
[
2
,
2
,
2
,
3
]],
np
.
float32
)
deltas
=
np
.
array
([[
3
,
1
,
2
,
2
],
[
1
,
2
,
1
,
4
]],
np
.
float32
)
means
=
(
0.1
,
0.1
,
0.2
,
0.2
)
stds
=
(
2.0
,
2.0
,
3.0
,
3.0
)
anchor_box
=
Tensor
(
anchor
,
mindspore
.
float32
)
deltas_box
=
Tensor
(
deltas
,
mindspore
.
float32
)
expect_deltas
=
np
.
array
([[
28.6500
,
0.0000
,
0.0000
,
33.8500
],
[
0.0000
,
0.0000
,
15.8663
,
72.7000
]],
np
.
float32
)
error
=
np
.
ones
(
shape
=
[
2
,
4
])
*
1.0e-4
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
'GPU'
)
boundingbox_decode
=
NetBoundingBoxDecode
(
means
,
stds
)
output
=
boundingbox_decode
(
anchor_box
,
deltas_box
)
diff
=
output
.
asnumpy
()
-
expect_deltas
assert
np
.
all
(
abs
(
diff
)
<
error
)
context
.
set_context
(
mode
=
context
.
PYNATIVE_MODE
,
device_target
=
'GPU'
)
boundingbox_decode
=
NetBoundingBoxDecode
(
means
,
stds
)
output
=
boundingbox_decode
(
anchor_box
,
deltas_box
)
diff
=
output
.
asnumpy
()
-
expect_deltas
assert
np
.
all
(
abs
(
diff
)
<
error
)
tests/st/ops/gpu/test_boundingbox_encode_op.py
0 → 100644
浏览文件 @
f679568d
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import
numpy
as
np
import
pytest
import
mindspore
import
mindspore.context
as
context
import
mindspore.nn
as
nn
from
mindspore
import
Tensor
from
mindspore.ops
import
operations
as
P
class
NetBoundingBoxEncode
(
nn
.
Cell
):
def
__init__
(
self
,
means
=
(
0.0
,
0.0
,
0.0
,
0.0
),
stds
=
(
1.0
,
1.0
,
1.0
,
1.0
)):
super
(
NetBoundingBoxEncode
,
self
).
__init__
()
self
.
encode
=
P
.
BoundingBoxEncode
(
means
=
means
,
stds
=
stds
)
def
construct
(
self
,
anchor
,
groundtruth
):
return
self
.
encode
(
anchor
,
groundtruth
)
def
bbox2delta
(
proposals
,
gt
,
means
,
stds
):
px
=
(
proposals
[...,
0
]
+
proposals
[...,
2
])
*
0.5
py
=
(
proposals
[...,
1
]
+
proposals
[...,
3
])
*
0.5
pw
=
proposals
[...,
2
]
-
proposals
[...,
0
]
+
1.0
ph
=
proposals
[...,
3
]
-
proposals
[...,
1
]
+
1.0
gx
=
(
gt
[...,
0
]
+
gt
[...,
2
])
*
0.5
gy
=
(
gt
[...,
1
]
+
gt
[...,
3
])
*
0.5
gw
=
gt
[...,
2
]
-
gt
[...,
0
]
+
1.0
gh
=
gt
[...,
3
]
-
gt
[...,
1
]
+
1.0
dx
=
(
gx
-
px
)
/
pw
dy
=
(
gy
-
py
)
/
ph
dw
=
np
.
log
(
gw
/
pw
)
dh
=
np
.
log
(
gh
/
ph
)
means
=
np
.
array
(
means
,
np
.
float32
)
stds
=
np
.
array
(
stds
,
np
.
float32
)
deltas
=
np
.
stack
([(
dx
-
means
[
0
])
/
stds
[
0
],
(
dy
-
means
[
1
])
/
stds
[
1
],
(
dw
-
means
[
2
])
/
stds
[
2
],
(
dh
-
means
[
3
])
/
stds
[
3
]],
axis
=-
1
)
return
deltas
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_gpu_training
@
pytest
.
mark
.
env_onecard
def
test_boundingbox_encode
():
anchor
=
np
.
array
([[
4
,
1
,
6
,
9
],
[
2
,
5
,
5
,
9
]]).
astype
(
np
.
float32
)
gt
=
np
.
array
([[
3
,
2
,
7
,
7
],
[
1
,
5
,
5
,
8
]]).
astype
(
np
.
float32
)
means
=
(
0.1
,
0.1
,
0.2
,
0.2
)
stds
=
(
2.0
,
2.0
,
3.0
,
3.0
)
anchor_box
=
Tensor
(
anchor
,
mindspore
.
float32
)
groundtruth_box
=
Tensor
(
gt
,
mindspore
.
float32
)
expect_deltas
=
bbox2delta
(
anchor
,
gt
,
means
,
stds
)
error
=
np
.
ones
(
shape
=
[
2
,
4
])
*
1.0e-6
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
'GPU'
)
boundingbox_encode
=
NetBoundingBoxEncode
(
means
,
stds
)
output
=
boundingbox_encode
(
anchor_box
,
groundtruth_box
)
diff
=
output
.
asnumpy
()
-
expect_deltas
assert
np
.
all
(
abs
(
diff
)
<
error
)
context
.
set_context
(
mode
=
context
.
PYNATIVE_MODE
,
device_target
=
'GPU'
)
boundingbox_encode
=
NetBoundingBoxEncode
(
means
,
stds
)
output
=
boundingbox_encode
(
anchor_box
,
groundtruth_box
)
diff
=
output
.
asnumpy
()
-
expect_deltas
assert
np
.
all
(
abs
(
diff
)
<
error
)
tests/st/ops/gpu/test_floordiv_op.py
0 → 100644
浏览文件 @
f679568d
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import
numpy
as
np
import
pytest
import
mindspore.context
as
context
import
mindspore.nn
as
nn
from
mindspore
import
Tensor
from
mindspore.ops
import
operations
as
P
class
NetFloorDiv
(
nn
.
Cell
):
def
__init__
(
self
):
super
(
NetFloorDiv
,
self
).
__init__
()
self
.
floordiv
=
P
.
FloorDiv
()
def
construct
(
self
,
x
,
y
):
return
self
.
floordiv
(
x
,
y
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_gpu_training
@
pytest
.
mark
.
env_onecard
def
test_floor_div
():
x0_np
=
np
.
random
.
randint
(
1
,
5
,
(
2
,
3
,
4
,
4
)).
astype
(
np
.
float32
)
y0_np
=
np
.
random
.
randint
(
1
,
5
,
(
2
,
3
,
4
,
4
)).
astype
(
np
.
float32
)
x1_np
=
np
.
random
.
randint
(
1
,
5
,
(
2
,
3
,
4
,
4
)).
astype
(
np
.
float32
)
y1_np
=
np
.
random
.
randint
(
1
,
5
,
(
2
,
1
,
4
,
4
)).
astype
(
np
.
float32
)
x2_np
=
np
.
random
.
randint
(
1
,
5
,
(
2
,
1
,
1
,
4
)).
astype
(
np
.
float32
)
y2_np
=
np
.
random
.
randint
(
1
,
5
,
(
2
,
3
,
4
,
4
)).
astype
(
np
.
float32
)
x3_np
=
np
.
random
.
randint
(
1
,
5
,
1
).
astype
(
np
.
float32
)
y3_np
=
np
.
random
.
randint
(
1
,
5
,
1
).
astype
(
np
.
float32
)
x4_np
=
np
.
array
(
768
).
astype
(
np
.
float32
)
y4_np
=
np
.
array
(
3072.5
).
astype
(
np
.
float32
)
x5_np
=
np
.
random
.
randint
(
1
,
5
,
(
2
,
3
,
4
,
4
)).
astype
(
np
.
float16
)
y5_np
=
np
.
random
.
randint
(
1
,
5
,
(
2
,
3
,
4
,
4
)).
astype
(
np
.
float16
)
x6_np
=
np
.
random
.
randint
(
1
,
5
,
(
2
,
3
,
4
,
4
)).
astype
(
np
.
int32
)
y6_np
=
np
.
random
.
randint
(
1
,
5
,
(
2
,
1
,
4
,
4
)).
astype
(
np
.
int32
)
x0
=
Tensor
(
x0_np
)
y0
=
Tensor
(
y0_np
)
x1
=
Tensor
(
x1_np
)
y1
=
Tensor
(
y1_np
)
x2
=
Tensor
(
x2_np
)
y2
=
Tensor
(
y2_np
)
x3
=
Tensor
(
x3_np
)
y3
=
Tensor
(
y3_np
)
x4
=
Tensor
(
x4_np
)
y4
=
Tensor
(
y4_np
)
x5
=
Tensor
(
x5_np
)
y5
=
Tensor
(
y5_np
)
x6
=
Tensor
(
x6_np
)
y6
=
Tensor
(
y6_np
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
'GPU'
)
floor_div
=
NetFloorDiv
()
output0
=
floor_div
(
x0
,
y0
)
expect0
=
np
.
floor_divide
(
x0_np
,
y0_np
)
diff0
=
output0
.
asnumpy
()
-
expect0
error0
=
np
.
ones
(
shape
=
expect0
.
shape
)
*
1.0e-5
assert
np
.
all
(
diff0
<
error0
)
assert
output0
.
shape
==
expect0
.
shape
output1
=
floor_div
(
x1
,
y1
)
expect1
=
np
.
floor_divide
(
x1_np
,
y1_np
)
diff1
=
output1
.
asnumpy
()
-
expect1
error1
=
np
.
ones
(
shape
=
expect1
.
shape
)
*
1.0e-5
assert
np
.
all
(
diff1
<
error1
)
assert
output1
.
shape
==
expect1
.
shape
output2
=
floor_div
(
x2
,
y2
)
expect2
=
np
.
floor_divide
(
x2_np
,
y2_np
)
diff2
=
output2
.
asnumpy
()
-
expect2
error2
=
np
.
ones
(
shape
=
expect2
.
shape
)
*
1.0e-5
assert
np
.
all
(
diff2
<
error2
)
assert
output2
.
shape
==
expect2
.
shape
context
.
set_context
(
mode
=
context
.
PYNATIVE_MODE
,
device_target
=
'GPU'
)
output3
=
floor_div
(
x3
,
y3
)
expect3
=
np
.
floor_divide
(
x3_np
,
y3_np
)
diff3
=
output3
.
asnumpy
()
-
expect3
error3
=
np
.
ones
(
shape
=
expect3
.
shape
)
*
1.0e-5
assert
np
.
all
(
diff3
<
error3
)
assert
output3
.
shape
==
expect3
.
shape
output4
=
floor_div
(
x4
,
y4
)
expect4
=
np
.
floor_divide
(
x4_np
,
y4_np
)
diff4
=
output4
.
asnumpy
()
-
expect4
error4
=
np
.
ones
(
shape
=
expect4
.
shape
)
*
1.0e-5
assert
np
.
all
(
diff4
<
error4
)
assert
output4
.
shape
==
expect4
.
shape
output5
=
floor_div
(
x5
,
y5
)
expect5
=
np
.
floor_divide
(
x5_np
,
y5_np
)
diff5
=
output5
.
asnumpy
()
-
expect5
error5
=
np
.
ones
(
shape
=
expect5
.
shape
)
*
1.0e-5
assert
np
.
all
(
diff5
<
error5
)
assert
output5
.
shape
==
expect5
.
shape
output6
=
floor_div
(
x6
,
y6
)
expect6
=
np
.
floor_divide
(
x6_np
,
y6_np
)
diff6
=
output6
.
asnumpy
()
-
expect6
error6
=
np
.
ones
(
shape
=
expect6
.
shape
)
*
1.0e-5
assert
np
.
all
(
diff6
<
error6
)
assert
output6
.
shape
==
expect6
.
shape
tests/st/ops/gpu/test_gathernd_op.py
0 → 100644
浏览文件 @
f679568d
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import
numpy
as
np
import
pytest
from
mindspore
import
Tensor
from
mindspore.ops
import
operations
as
P
import
mindspore.nn
as
nn
import
mindspore.context
as
context
class
GatherNdNet
(
nn
.
Cell
):
def
__init__
(
self
):
super
(
GatherNdNet
,
self
).
__init__
()
self
.
gathernd
=
P
.
GatherNd
()
def
construct
(
self
,
x
,
indices
):
return
self
.
gathernd
(
x
,
indices
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_gpu_training
@
pytest
.
mark
.
env_onecard
def
test_gathernd0
():
x
=
Tensor
(
np
.
arange
(
3
*
2
,
dtype
=
np
.
float32
).
reshape
(
3
,
2
))
indices
=
Tensor
(
np
.
array
([[
1
,
1
],
[
0
,
1
]]).
astype
(
np
.
int32
))
expect
=
np
.
array
([
3.
,
1.
])
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"GPU"
)
gathernd
=
GatherNdNet
()
output
=
gathernd
(
x
,
indices
)
error
=
np
.
ones
(
shape
=
output
.
asnumpy
().
shape
)
*
1.0e-6
diff
=
output
.
asnumpy
()
-
expect
assert
np
.
all
(
diff
<
error
)
assert
np
.
all
(
-
diff
<
error
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_gpu_traning
@
pytest
.
mark
.
env_onecard
def
test_gathernd1
():
x
=
Tensor
(
np
.
arange
(
2
*
3
*
4
*
5
,
dtype
=
np
.
float32
).
reshape
(
2
,
3
,
4
,
5
))
indices
=
Tensor
(
np
.
array
([[[[[
l
,
k
,
j
,
i
]
for
i
in
[
1
,
3
,
4
]]
for
j
in
range
(
4
)]
for
k
in
range
(
3
)]
for
l
in
range
(
2
)],
dtype
=
'i4'
))
expect
=
np
.
array
([[[[
1.
,
3.
,
4.
],
[
6.
,
8.
,
9.
],
[
11.
,
13.
,
14.
],
[
16.
,
18.
,
19.
]],
[[
21.
,
23.
,
24.
],
[
26.
,
28.
,
29.
],
[
31.
,
33.
,
34.
],
[
36.
,
38.
,
39.
]],
[[
41.
,
43.
,
44.
],
[
46.
,
48.
,
49.
],
[
51.
,
53.
,
54.
],
[
56.
,
58.
,
59.
]]],
[[[
61.
,
63.
,
64.
],
[
66.
,
68.
,
69.
],
[
71.
,
73.
,
74.
],
[
76.
,
78.
,
79.
]],
[[
81.
,
83.
,
84.
],
[
86.
,
88.
,
89.
],
[
91.
,
93.
,
94.
],
[
96.
,
98.
,
99.
]],
[[
101.
,
103.
,
104.
],
[
106.
,
108.
,
109.
],
[
111.
,
113.
,
114.
],
[
116.
,
118.
,
119.
]]]])
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"GPU"
)
gather
=
GatherNdNet
()
output
=
gather
(
x
,
indices
)
error
=
np
.
ones
(
shape
=
output
.
asnumpy
().
shape
)
*
1.0e-6
diff
=
output
.
asnumpy
()
-
expect
assert
np
.
all
(
diff
<
error
)
assert
np
.
all
(
-
diff
<
error
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_gpu_traning
@
pytest
.
mark
.
env_onecard
def
test_gathernd2
():
x
=
Tensor
(
np
.
array
([[
4.
,
5.
,
4.
,
1.
,
5.
],
[
4.
,
9.
,
5.
,
6.
,
4.
],
[
9.
,
8.
,
4.
,
3.
,
6.
],
[
0.
,
4.
,
2.
,
2.
,
8.
],
[
1.
,
8.
,
6.
,
2.
,
8.
],
[
8.
,
1.
,
9.
,
7.
,
3.
],
[
7.
,
9.
,
2.
,
5.
,
7.
],
[
9.
,
8.
,
6.
,
8.
,
5.
],
[
3.
,
7.
,
2.
,
7.
,
4.
],
[
4.
,
2.
,
8.
,
2.
,
9.
]]).
astype
(
np
.
float16
))
indices
=
Tensor
(
np
.
array
([[
4000
],
[
1
],
[
300000
]]).
astype
(
np
.
int32
))
expect
=
np
.
array
([[
0.
,
0.
,
0.
,
0.
,
0.
],
[
4.
,
9.
,
5.
,
6.
,
4.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]])
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"GPU"
)
gathernd
=
GatherNdNet
()
output
=
gathernd
(
x
,
indices
)
error
=
np
.
ones
(
shape
=
output
.
asnumpy
().
shape
)
*
1.0e-6
diff
=
output
.
asnumpy
()
-
expect
assert
np
.
all
(
diff
<
error
)
assert
np
.
all
(
-
diff
<
error
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_gpu_traning
@
pytest
.
mark
.
env_onecard
def
test_gathernd3
():
x
=
Tensor
(
np
.
array
([[
4
,
5
,
4
,
1
,
5
],
[
4
,
9
,
5
,
6
,
4
],
[
9
,
8
,
4
,
3
,
6
],
[
0
,
4
,
2
,
2
,
8
],
[
1
,
8
,
6
,
2
,
8
],
[
8
,
1
,
9
,
7
,
3
],
[
7
,
9
,
2
,
5
,
7
],
[
9
,
8
,
6
,
8
,
5
],
[
3
,
7
,
2
,
7
,
4
],
[
4
,
2
,
8
,
2
,
9
]]
).
astype
(
np
.
int32
))
indices
=
Tensor
(
np
.
array
([[
4000
],
[
1
],
[
300000
]]).
astype
(
np
.
int32
))
expect
=
np
.
array
([[
0
,
0
,
0
,
0
,
0
],
[
4
,
9
,
5
,
6
,
4
],
[
0
,
0
,
0
,
0
,
0
]])
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"GPU"
)
gathernd
=
GatherNdNet
()
output
=
gathernd
(
x
,
indices
)
error
=
np
.
ones
(
shape
=
output
.
asnumpy
().
shape
)
*
1.0e-6
diff
=
output
.
asnumpy
()
-
expect
assert
np
.
all
(
diff
<
error
)
assert
np
.
all
(
-
diff
<
error
)
tests/st/ops/gpu/test_scatter_nd.py
0 → 100644
浏览文件 @
f679568d
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import
numpy
as
np
import
pytest
import
mindspore.context
as
context
import
mindspore.nn
as
nn
from
mindspore
import
Tensor
from
mindspore.ops
import
operations
as
P
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"GPU"
)
class
Net
(
nn
.
Cell
):
def
__init__
(
self
,
_shape
):
super
(
Net
,
self
).
__init__
()
self
.
shape
=
_shape
self
.
scatternd
=
P
.
ScatterNd
()
def
construct
(
self
,
indices
,
update
):
return
self
.
scatternd
(
indices
,
update
,
self
.
shape
)
def
scatternd_net
(
indices
,
update
,
_shape
,
expect
):
scatternd
=
Net
(
_shape
)
output
=
scatternd
(
Tensor
(
indices
),
Tensor
(
update
))
error
=
np
.
ones
(
shape
=
output
.
asnumpy
().
shape
)
*
1.0e-6
diff
=
output
.
asnumpy
()
-
expect
assert
np
.
all
(
diff
<
error
)
assert
np
.
all
(
-
diff
<
error
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_gpu_traning
@
pytest
.
mark
.
env_onecard
def
test_scatternd
():
arr_indices
=
np
.
array
([[
0
,
1
],
[
1
,
1
]]).
astype
(
np
.
int32
)
arr_update
=
np
.
array
([
3.2
,
1.1
]).
astype
(
np
.
float32
)
shape
=
(
2
,
2
)
expect
=
np
.
array
([[
0.
,
3.2
],
[
0.
,
1.1
]])
scatternd_net
(
arr_indices
,
arr_update
,
shape
,
expect
)
tests/st/ops/gpu/test_sgd_op.py
0 → 100644
浏览文件 @
f679568d
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import
numpy
as
np
import
pytest
import
mindspore.context
as
context
import
mindspore.nn
as
nn
from
mindspore
import
Tensor
from
mindspore.nn
import
Dense
from
mindspore.nn
import
TrainOneStepCell
,
WithLossCell
from
mindspore.nn.optim
import
SGD
from
mindspore.ops
import
operations
as
P
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"GPU"
)
class
NetSGD
(
nn
.
Cell
):
def
__init__
(
self
):
super
(
NetSGD
,
self
).
__init__
()
self
.
batch_size
=
1
self
.
reshape
=
P
.
Reshape
()
weight
=
Tensor
(
np
.
ones
([
10
,
16
]).
astype
(
np
.
float32
)
*
0.01
)
self
.
fc1
=
Dense
(
16
,
10
,
weight_init
=
weight
)
def
construct
(
self
,
input_x
):
output
=
self
.
reshape
(
input_x
,
(
self
.
batch_size
,
-
1
))
output
=
self
.
fc1
(
output
)
return
output
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_gpu_training
@
pytest
.
mark
.
env_onecard
def
test_SGD
():
epoch
=
3
net
=
NetSGD
()
learning_rate
=
0.1
momentum
=
0.9
dampening
=
0.0
weight_decay
=
0.0
nesterov
=
True
loss_scale
=
1.0
optimizer
=
SGD
(
filter
(
lambda
x
:
x
.
requires_grad
,
net
.
get_parameters
()),
learning_rate
,
momentum
,
dampening
,
weight_decay
,
nesterov
,
loss_scale
)
criterion
=
nn
.
SoftmaxCrossEntropyWithLogits
(
is_grad
=
False
,
sparse
=
True
)
net_with_criterion
=
WithLossCell
(
net
,
criterion
)
train_network
=
TrainOneStepCell
(
net_with_criterion
,
optimizer
)
# optimizer
train_network
.
set_train
()
losses
=
[]
for
_
in
range
(
epoch
):
data
=
Tensor
(
np
.
arange
(
0
,
16
).
reshape
(
1
,
1
,
4
,
4
).
astype
(
np
.
float32
)
*
0.01
)
label
=
Tensor
(
np
.
array
([
0
]).
astype
(
np
.
int32
))
loss
=
train_network
(
data
,
label
)
losses
.
append
(
loss
.
asnumpy
())
last_loss
=
100.0
for
loss
in
losses
:
assert
last_loss
>
loss
last_loss
=
loss
return
losses
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录