Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
bb2777a1
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bb2777a1
编写于
6月 09, 2020
作者:
D
dengwentao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cpu op sub
上级
54991615
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
39 addition
and
28 deletion
+39
-28
mindspore/ccsrc/kernel/CMakeLists.txt
mindspore/ccsrc/kernel/CMakeLists.txt
+1
-1
mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc
mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc
+15
-11
mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc
mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc
+16
-8
mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.h
mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.h
+7
-8
未找到文件。
mindspore/ccsrc/kernel/CMakeLists.txt
浏览文件 @
bb2777a1
...
...
@@ -27,7 +27,7 @@ if (ENABLE_CPU)
list
(
REMOVE_ITEM CPU_SRC_LIST
"cpu/reduce_scatter_cpu_kernel.cc"
)
list
(
REMOVE_ITEM CPU_SRC_LIST
"cpu/embedding_look_up_comm_grad_cpu_kernel.cc"
)
list
(
REMOVE_ITEM CPU_SRC_LIST
"cpu/embedding_look_up_cpu_kernel.cc"
)
list
(
REMOVE_ITEM CPU_SRC_LIST
"cpu/sub
scalar
_cpu_kernel.cc"
)
list
(
REMOVE_ITEM CPU_SRC_LIST
"cpu/sub_cpu_kernel.cc"
)
endif
()
endif
()
...
...
mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc
浏览文件 @
bb2777a1
...
...
@@ -48,19 +48,14 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
auto
output_type
=
AnfAlgo
::
GetOutputInferDataType
(
kernel_node
,
0
);
MS_LOG
(
DEBUG
)
<<
"output type: "
<<
output_type
;
int
axis
=
AnfAlgo
::
GetNodeAttr
<
int
>
(
kernel_node
,
"axis"
);
MS_LOG
(
DEBUG
)
<<
"axis: "
<<
axis
;
if
(
axis_
<
0
)
{
axis
=
axis
+
SizeToInt
(
input_shape_
.
size
());
}
axis_
=
4
-
input_shape_
.
size
()
+
axis
;
axis_
=
4
-
input_shape_
.
size
();
MS_LOG
(
DEBUG
)
<<
"axis_: "
<<
axis_
;
reduce_scatter_flag_
=
AnfAlgo
::
GetNodeAttr
<
bool
>
(
kernel_node
,
"reduce_scatter_flag"
);
MS_LOG
(
DEBUG
)
<<
"reduce_scatter_flag: "
<<
reduce_scatter_flag_
;
if
(
reduce_scatter_flag_
)
{
size_t
gatherv2_out_lens
=
1
;
for
(
int
i
=
0
;
i
<
SizeToInt
(
input_shape_
.
size
());
i
++
)
{
if
(
i
==
axis
)
{
if
(
i
==
0
)
{
for
(
int
j
=
0
;
j
<
SizeToInt
(
indices_shape_
.
size
());
j
++
)
{
MS_LOG
(
DEBUG
)
<<
"gatherv2 out shape: "
<<
indices_shape_
[
j
];
gatherv2_out_lens
=
gatherv2_out_lens
*
indices_shape_
[
j
];
...
...
@@ -76,7 +71,10 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
if
(
gather_v2_out_
==
nullptr
)
{
MS_LOG
(
EXCEPTION
)
<<
"EmbeddingLookUpCPUKernel malloc failed, malloc lens: "
<<
gatherv2_out_lens_
;
}
memset_s
(
gather_v2_out_
,
gatherv2_out_lens_
,
0
,
gatherv2_out_lens_
);
auto
ret
=
memset_s
(
gather_v2_out_
,
gatherv2_out_lens_
,
0
,
gatherv2_out_lens_
);
if
(
ret
!=
0
)
{
MS_LOG
(
EXCEPTION
)
<<
"EmbeddingLookUpCPUKernel memset gatherv2 out buff failed"
;
}
split_num_
=
AnfAlgo
::
GetNodeAttr
<
int
>
(
kernel_node
,
"split_num"
);
MS_LOG
(
DEBUG
)
<<
"split_num: "
<<
split_num_
;
...
...
@@ -99,6 +97,12 @@ bool EmbeddingLookUpCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
auto
output_addr
=
reinterpret_cast
<
float
*>
(
outputs
[
0
]
->
addr
);
MS_LOG
(
DEBUG
)
<<
"output addr: "
<<
output_addr
<<
"output size: "
<<
outputs
[
0
]
->
size
;
float
*
gather_out_addr
=
reduce_scatter_flag_
?
reinterpret_cast
<
float
*>
(
gather_v2_out_
)
:
output_addr
;
if
(
!
reduce_scatter_flag_
)
{
auto
ret
=
memset_s
(
gather_out_addr
,
outputs
[
0
]
->
size
,
0
,
outputs
[
0
]
->
size
);
if
(
ret
!=
0
)
{
MS_LOG
(
EXCEPTION
)
<<
"EmbeddingLookUpCPUKernel memset out buff failed"
;
}
}
MS_LOG
(
DEBUG
)
<<
"gatherv2 out addr: "
<<
gather_out_addr
;
size_t
dim0
=
input_shape_
[
0
];
size_t
dim1
=
input_shape_
[
1
];
...
...
@@ -149,10 +153,10 @@ bool EmbeddingLookUpCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
return
true
;
}
void
memcpy_task
(
std
::
vector
<
float
*>
mem_dest_addr_list
,
std
::
vector
<
float
*>
mem_src_addr_list
,
size_t
start
,
void
memcpy_task
(
std
::
vector
<
float
*>
*
mem_dest_addr_list
,
std
::
vector
<
float
*>
*
mem_src_addr_list
,
size_t
start
,
size_t
end
,
size_t
lens
)
{
for
(
size_t
i
=
start
;
i
<
end
;
i
++
)
{
auto
ret
=
memcpy_s
(
mem_dest_addr_list
[
i
],
lens
,
mem_src_addr_list
[
i
],
lens
);
auto
ret
=
memcpy_s
(
(
*
mem_dest_addr_list
)[
i
],
lens
,
(
*
mem_src_addr_list
)
[
i
],
lens
);
if
(
ret
!=
EOK
)
{
MS_LOG
(
EXCEPTION
)
<<
"memery copy failed."
;
}
...
...
@@ -204,7 +208,7 @@ void EmbeddingLookUpCPUKernel::LookUpTable(const std::vector<kernel::AddressPtr>
break
;
}
auto
end
=
(
start
+
ones_copy_lens
)
>
memcpy_lens
?
memcpy_lens
:
start
+
ones_copy_lens
;
threads
[
i
]
=
std
::
thread
(
memcpy_task
,
mem_dest_addr_list
,
mem_src_addr_list
,
start
,
end
,
lens
);
threads
[
i
]
=
std
::
thread
(
memcpy_task
,
&
mem_dest_addr_list
,
&
mem_src_addr_list
,
start
,
end
,
lens
);
start
=
start
+
ones_copy_lens
;
}
for
(
size_t
j
=
0
;
j
<
i
;
j
++
)
{
...
...
mindspore/ccsrc/kernel/cpu/sub
scalar
_cpu_kernel.cc
→
mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc
浏览文件 @
bb2777a1
...
...
@@ -14,14 +14,20 @@
* limitations under the License.
*/
#include <thread>
#include "kernel/cpu/sub
scalar
_cpu_kernel.h"
#include "kernel/cpu/sub_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
namespace
mindspore
{
namespace
kernel
{
void
SubscalarCPUKernel
::
InitKernel
(
const
CNodePtr
&
kernel_node
)
{
offset_
=
AnfAlgo
::
GetNodeAttr
<
int
>
(
kernel_node
,
"input_y"
);
MS_LOG
(
DEBUG
)
<<
"offset: "
<<
offset_
;
void
SubCPUKernel
::
InitKernel
(
const
CNodePtr
&
kernel_node
)
{
auto
shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
1
);
if
(
shape
.
size
()
==
1
)
{
if
(
shape
[
0
]
!=
1
)
{
MS_LOG
(
EXCEPTION
)
<<
"input 1 only support scalar"
;
}
}
else
{
MS_LOG
(
EXCEPTION
)
<<
"input 1 only support scalar"
;
}
}
void
sub_task
(
int
*
in_addr
,
int
*
out_addr
,
size_t
lens
,
int
offset
)
{
...
...
@@ -30,9 +36,9 @@ void sub_task(int *in_addr, int *out_addr, size_t lens, int offset) {
}
}
bool
Sub
scalar
CPUKernel
::
Launch
(
const
std
::
vector
<
kernel
::
AddressPtr
>
&
inputs
,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
/*workspace*/
,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
outputs
)
{
bool
SubCPUKernel
::
Launch
(
const
std
::
vector
<
kernel
::
AddressPtr
>
&
inputs
,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
/*workspace*/
,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
outputs
)
{
#if defined(_WIN32) || defined(_WIN64)
auto
start_time
=
std
::
chrono
::
steady_clock
::
now
();
#else
...
...
@@ -41,6 +47,8 @@ bool SubscalarCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
#endif
auto
input_addr
=
reinterpret_cast
<
int
*>
(
inputs
[
0
]
->
addr
);
auto
output_addr
=
reinterpret_cast
<
int
*>
(
outputs
[
0
]
->
addr
);
offset_
=
*
reinterpret_cast
<
int
*>
(
inputs
[
1
]
->
addr
);
MS_LOG
(
INFO
)
<<
"offset: "
<<
offset_
;
auto
lens
=
inputs
[
0
]
->
size
/
sizeof
(
int
);
if
(
lens
<
10000
)
{
for
(
size_t
i
=
0
;
i
<
lens
;
i
++
)
{
...
...
@@ -73,7 +81,7 @@ bool SubscalarCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
(
void
)
gettimeofday
(
&
end_time
,
nullptr
);
uint64_t
time
=
1000000
*
static_cast
<
uint64_t
>
(
end_time
.
tv_sec
-
start_time
.
tv_sec
);
time
+=
static_cast
<
uint64_t
>
(
end_time
.
tv_usec
-
start_time
.
tv_usec
);
MS_LOG
(
INFO
)
<<
"Sub
scalar
CPUKernel, used time: "
<<
time
<<
" us"
;
MS_LOG
(
INFO
)
<<
"SubCPUKernel, used time: "
<<
time
<<
" us"
;
#endif
return
true
;
}
...
...
mindspore/ccsrc/kernel/cpu/sub
scalar
_cpu_kernel.h
→
mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.h
浏览文件 @
bb2777a1
...
...
@@ -13,8 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SUB
SCALAR
_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_SUB
SCALAR
_CPU_KERNEL_H_
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SUB_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_SUB_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
...
...
@@ -22,10 +22,10 @@
namespace
mindspore
{
namespace
kernel
{
class
Sub
scalar
CPUKernel
:
public
CPUKernel
{
class
SubCPUKernel
:
public
CPUKernel
{
public:
Sub
scalar
CPUKernel
()
:
offset_
(
0
)
{}
~
Sub
scalar
CPUKernel
()
override
=
default
;
SubCPUKernel
()
:
offset_
(
0
)
{}
~
SubCPUKernel
()
override
=
default
;
void
InitKernel
(
const
CNodePtr
&
kernel_node
)
override
;
...
...
@@ -36,9 +36,8 @@ class SubscalarCPUKernel : public CPUKernel {
int
offset_
;
};
MS_REG_CPU_KERNEL
(
Subscalar
,
KernelAttr
().
AddInputAttr
(
kNumberTypeInt32
).
AddOutputAttr
(
kNumberTypeInt32
),
SubscalarCPUKernel
);
MS_REG_CPU_KERNEL
(
Sub
,
KernelAttr
().
AddInputAttr
(
kNumberTypeInt32
).
AddOutputAttr
(
kNumberTypeInt32
),
SubCPUKernel
);
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_SUB
SCALAR
_CPU_KERNEL_H_
#endif // MINDSPORE_CCSRC_KERNEL_CPU_SUB_CPU_KERNEL_H_
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录