Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
41d195b5
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
41d195b5
编写于
6月 05, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
6月 05, 2020
浏览文件
操作
浏览文件
下载
差异文件
!1794 support cpu op EmbeddingLookup
Merge pull request !1794 from dengwentao/cpu_op
上级
10ebd81b
098eec46
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
562 addition
and
0 deletion
+562
-0
mindspore/ccsrc/kernel/CMakeLists.txt
mindspore/ccsrc/kernel/CMakeLists.txt
+3
-0
mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
...csrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
+77
-0
mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h
...ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h
+46
-0
mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc
mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc
+228
-0
mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.h
mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.h
+74
-0
mindspore/ccsrc/kernel/cpu/subscalar_cpu_kernel.cc
mindspore/ccsrc/kernel/cpu/subscalar_cpu_kernel.cc
+81
-0
mindspore/ccsrc/kernel/cpu/subscalar_cpu_kernel.h
mindspore/ccsrc/kernel/cpu/subscalar_cpu_kernel.h
+44
-0
mindspore/ccsrc/operator/ops.cc
mindspore/ccsrc/operator/ops.cc
+3
-0
mindspore/ccsrc/operator/ops.h
mindspore/ccsrc/operator/ops.h
+3
-0
mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
...e/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
+3
-0
未找到文件。
mindspore/ccsrc/kernel/CMakeLists.txt
浏览文件 @
41d195b5
...
...
@@ -25,6 +25,9 @@ if (ENABLE_CPU)
if
(
NOT ENABLE_MPI
)
list
(
REMOVE_ITEM CPU_SRC_LIST
"cpu/allgather_cpu_kernel.cc"
)
list
(
REMOVE_ITEM CPU_SRC_LIST
"cpu/reduce_scatter_cpu_kernel.cc"
)
list
(
REMOVE_ITEM CPU_SRC_LIST
"cpu/embedding_look_up_comm_grad_cpu_kernel.cc"
)
list
(
REMOVE_ITEM CPU_SRC_LIST
"cpu/embedding_look_up_cpu_kernel.cc"
)
list
(
REMOVE_ITEM CPU_SRC_LIST
"cpu/subscalar_cpu_kernel.cc"
)
endif
()
endif
()
...
...
mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
0 → 100644
浏览文件 @
41d195b5
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <thread>
#include "kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
#include "device/cpu/mpi/mpi_adapter.h"
#include "ir/primitive.h"
namespace
mindspore
{
namespace
kernel
{
void
EmbeddingLookUpCommGradCPUKernel
::
InitKernel
(
const
CNodePtr
&
kernel_node
)
{
CheckParam
(
kernel_node
);
split_num_
=
AnfAlgo
::
GetNodeAttr
<
int
>
(
kernel_node
,
"split_num"
);
MS_LOG
(
INFO
)
<<
"split_num: "
<<
split_num_
;
auto
input_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
if
(
input_shape
[
0
]
%
split_num_
!=
0
)
{
MS_LOG
(
EXCEPTION
)
<<
"Input shape[0] is "
<<
input_shape
[
0
]
<<
", but it must be multiple of split_num."
;
}
}
bool
EmbeddingLookUpCommGradCPUKernel
::
Launch
(
const
std
::
vector
<
kernel
::
AddressPtr
>
&
inputs
,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
/*workspace*/
,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
outputs
)
{
#if defined(_WIN32) || defined(_WIN64)
auto
start_time
=
std
::
chrono
::
steady_clock
::
now
();
#else
struct
timeval
start_time
,
end_time
;
(
void
)
gettimeofday
(
&
start_time
,
nullptr
);
#endif
auto
input_addr
=
reinterpret_cast
<
float
*>
(
inputs
[
0
]
->
addr
);
auto
output_addr
=
reinterpret_cast
<
float
*>
(
outputs
[
0
]
->
addr
);
size_t
input_size
=
inputs
[
0
]
->
size
;
size_t
output_size
=
outputs
[
0
]
->
size
;
MS_LOG
(
DEBUG
)
<<
"input addr: "
<<
input_addr
<<
"input size: "
<<
input_size
;
MS_LOG
(
DEBUG
)
<<
"output addr: "
<<
output_addr
<<
"output size: "
<<
output_size
;
memset_s
(
output_addr
,
output_size
,
0
,
output_size
);
const
std
::
vector
<
int
>
&
rank_group
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
size_t
input_split_lens
=
input_size
/
split_num_
/
sizeof
(
float_t
);
size_t
output_split_lens
=
output_size
/
split_num_
/
sizeof
(
float_t
);
for
(
int
i
=
0
;
i
<
split_num_
;
i
++
)
{
device
::
cpu
::
MPIAdapter
::
Instance
().
AllGather
(
input_addr
+
i
*
input_split_lens
,
output_addr
+
i
*
output_split_lens
,
rank_group
,
input_split_lens
);
}
#if defined(_WIN32) || defined(_WIN64)
auto
end_time
=
std
::
chrono
::
steady_clock
::
now
();
std
::
chrono
::
duration
<
double
,
std
::
ratio
<
1
,
1000000
>>
cost
=
end_time
-
start_time
;
MS_LOG
(
INFO
)
<<
"EmbeddingLookUpCommGradCPUKernel, used time: "
<<
cost
.
count
()
<<
" us"
;
#else
(
void
)
gettimeofday
(
&
end_time
,
nullptr
);
uint64_t
time
=
1000000
*
static_cast
<
uint64_t
>
(
end_time
.
tv_sec
-
start_time
.
tv_sec
);
time
+=
static_cast
<
uint64_t
>
(
end_time
.
tv_usec
-
start_time
.
tv_usec
);
MS_LOG
(
INFO
)
<<
"EmbeddingLookUpCommGradCPUKernel, used time: "
<<
time
<<
" us"
;
#endif
return
true
;
}
void
EmbeddingLookUpCommGradCPUKernel
::
CheckParam
(
const
CNodePtr
&
kernel_node
)
{
size_t
input_num
=
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
if
(
input_num
!=
1
)
{
MS_LOG
(
EXCEPTION
)
<<
"Argument number is "
<<
input_num
<<
", but EmbeddingLookUpCommGradCPUKernel needs 1."
;
}
}
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h
0 → 100644
浏览文件 @
41d195b5
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
namespace
mindspore
{
namespace
kernel
{
class
EmbeddingLookUpCommGradCPUKernel
:
public
CPUKernel
{
public:
EmbeddingLookUpCommGradCPUKernel
()
:
split_num_
(
1
)
{}
~
EmbeddingLookUpCommGradCPUKernel
()
override
{};
void
InitKernel
(
const
CNodePtr
&
kernel_node
)
override
;
bool
Launch
(
const
std
::
vector
<
AddressPtr
>
&
inputs
,
const
std
::
vector
<
AddressPtr
>
&
workspace
,
const
std
::
vector
<
AddressPtr
>
&
outputs
)
override
;
private:
void
CheckParam
(
const
CNodePtr
&
kernel_node
);
int
split_num_
;
};
MS_REG_CPU_KERNEL
(
EmbeddingLookupCommGrad
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddOutputAttr
(
kNumberTypeFloat32
),
EmbeddingLookUpCommGradCPUKernel
);
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_
mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc
0 → 100644
浏览文件 @
41d195b5
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <thread>
#include <string>
#include "kernel/cpu/embedding_look_up_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
#include "device/cpu/mpi/mpi_adapter.h"
#include "ir/primitive.h"
namespace
mindspore
{
namespace
kernel
{
void
EmbeddingLookUpCPUKernel
::
InitKernel
(
const
CNodePtr
&
kernel_node
)
{
CheckParam
(
kernel_node
);
input_shape_
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
input_lens_
=
1
;
for
(
auto
shape
:
input_shape_
)
{
MS_LOG
(
DEBUG
)
<<
"input shape: "
<<
shape
;
input_lens_
=
input_lens_
*
shape
;
}
MS_LOG
(
DEBUG
)
<<
"input lens: "
<<
input_lens_
;
indices_shape_
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
1
);
indices_lens_
=
1
;
for
(
auto
shape
:
indices_shape_
)
{
MS_LOG
(
DEBUG
)
<<
"indice shape: "
<<
shape
;
indices_lens_
=
indices_lens_
*
shape
;
}
MS_LOG
(
DEBUG
)
<<
"indice lens: "
<<
indices_lens_
;
output_shape_
=
AnfAlgo
::
GetOutputInferShape
(
kernel_node
,
0
);
for
(
auto
shape
:
output_shape_
)
{
MS_LOG
(
DEBUG
)
<<
"output shape: "
<<
shape
;
}
auto
output_type
=
AnfAlgo
::
GetOutputInferDataType
(
kernel_node
,
0
);
MS_LOG
(
DEBUG
)
<<
"output type: "
<<
output_type
;
int
axis
=
AnfAlgo
::
GetNodeAttr
<
int
>
(
kernel_node
,
"axis"
);
MS_LOG
(
DEBUG
)
<<
"axis: "
<<
axis
;
if
(
axis_
<
0
)
{
axis
=
axis
+
SizeToInt
(
input_shape_
.
size
());
}
axis_
=
4
-
input_shape_
.
size
()
+
axis
;
MS_LOG
(
DEBUG
)
<<
"axis_: "
<<
axis_
;
reduce_scatter_flag_
=
AnfAlgo
::
GetNodeAttr
<
bool
>
(
kernel_node
,
"reduce_scatter_flag"
);
MS_LOG
(
DEBUG
)
<<
"reduce_scatter_flag: "
<<
reduce_scatter_flag_
;
if
(
reduce_scatter_flag_
)
{
size_t
gatherv2_out_lens
=
1
;
for
(
int
i
=
0
;
i
<
SizeToInt
(
input_shape_
.
size
());
i
++
)
{
if
(
i
==
axis
)
{
for
(
int
j
=
0
;
j
<
SizeToInt
(
indices_shape_
.
size
());
j
++
)
{
MS_LOG
(
DEBUG
)
<<
"gatherv2 out shape: "
<<
indices_shape_
[
j
];
gatherv2_out_lens
=
gatherv2_out_lens
*
indices_shape_
[
j
];
}
}
else
{
MS_LOG
(
DEBUG
)
<<
"gatherv2 out shape: "
<<
input_shape_
[
i
];
gatherv2_out_lens
=
gatherv2_out_lens
*
input_shape_
[
i
];
}
}
gatherv2_out_lens_
=
gatherv2_out_lens
*
sizeof
(
float
);
MS_LOG
(
DEBUG
)
<<
"gatherv2 out lens: "
<<
gatherv2_out_lens_
;
gather_v2_out_
=
malloc
(
gatherv2_out_lens_
);
if
(
gather_v2_out_
==
nullptr
)
{
MS_LOG
(
EXCEPTION
)
<<
"EmbeddingLookUpCPUKernel malloc failed, malloc lens: "
<<
gatherv2_out_lens_
;
}
memset_s
(
gather_v2_out_
,
gatherv2_out_lens_
,
0
,
gatherv2_out_lens_
);
split_num_
=
AnfAlgo
::
GetNodeAttr
<
int
>
(
kernel_node
,
"split_num"
);
MS_LOG
(
DEBUG
)
<<
"split_num: "
<<
split_num_
;
}
offset_
=
AnfAlgo
::
GetNodeAttr
<
int
>
(
kernel_node
,
"offset"
);
MS_LOG
(
DEBUG
)
<<
"offset: "
<<
offset_
;
CPUKernelUtils
::
ExpandDimsTo4
(
&
input_shape_
);
CPUKernelUtils
::
ExpandDimsTo4
(
&
output_shape_
);
}
bool
EmbeddingLookUpCPUKernel
::
Launch
(
const
std
::
vector
<
kernel
::
AddressPtr
>
&
inputs
,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
/*workspace*/
,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
outputs
)
{
#if defined(_WIN32) || defined(_WIN64)
auto
start_time
=
std
::
chrono
::
steady_clock
::
now
();
#else
struct
timeval
start_time
,
end_time
;
(
void
)
gettimeofday
(
&
start_time
,
nullptr
);
#endif
auto
output_addr
=
reinterpret_cast
<
float
*>
(
outputs
[
0
]
->
addr
);
MS_LOG
(
DEBUG
)
<<
"output addr: "
<<
output_addr
<<
"output size: "
<<
outputs
[
0
]
->
size
;
float
*
gather_out_addr
=
reduce_scatter_flag_
?
reinterpret_cast
<
float
*>
(
gather_v2_out_
)
:
output_addr
;
MS_LOG
(
DEBUG
)
<<
"gatherv2 out addr: "
<<
gather_out_addr
;
size_t
dim0
=
input_shape_
[
0
];
size_t
dim1
=
input_shape_
[
1
];
size_t
dim2
=
input_shape_
[
2
];
if
(
axis_
==
3
)
{
for
(
size_t
i
=
0
;
i
<
dim0
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
dim1
;
++
j
)
{
for
(
size_t
k
=
0
;
k
<
dim2
;
++
k
)
{
LookUpTable
(
inputs
,
i
,
j
,
k
,
&
gather_out_addr
);
}
}
}
}
else
if
(
axis_
==
2
)
{
for
(
size_t
i
=
0
;
i
<
dim0
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
dim1
;
++
j
)
{
LookUpTable
(
inputs
,
i
,
j
,
0
,
&
gather_out_addr
);
}
}
}
else
if
(
axis_
==
1
)
{
for
(
size_t
i
=
0
;
i
<
dim0
;
++
i
)
{
LookUpTable
(
inputs
,
i
,
0
,
0
,
&
gather_out_addr
);
}
}
else
if
(
axis_
==
0
)
{
LookUpTable
(
inputs
,
0
,
0
,
0
,
&
gather_out_addr
);
}
if
(
reduce_scatter_flag_
)
{
size_t
one_split_lens
=
gatherv2_out_lens_
/
split_num_
/
sizeof
(
float
);
size_t
reduce_scatter_out_lens
=
one_split_lens
/
8
;
const
std
::
vector
<
int
>
&
group
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
for
(
int
i
=
0
;
i
<
split_num_
;
i
++
)
{
device
::
cpu
::
MPIAdapter
::
Instance
().
ReduceScatter
(
reinterpret_cast
<
float
*>
(
gather_v2_out_
)
+
i
*
one_split_lens
,
output_addr
+
i
*
reduce_scatter_out_lens
,
group
,
one_split_lens
,
"sum"
);
}
}
#if defined(_WIN32) || defined(_WIN64)
auto
end_time
=
std
::
chrono
::
steady_clock
::
now
();
std
::
chrono
::
duration
<
double
,
std
::
ratio
<
1
,
1000000
>>
cost
=
end_time
-
start_time
;
MS_LOG
(
INFO
)
<<
"EmbeddingLookUpCPUKernel, used time: "
<<
cost
.
count
()
<<
" us"
;
#else
(
void
)
gettimeofday
(
&
end_time
,
nullptr
);
uint64_t
time
=
1000000
*
static_cast
<
uint64_t
>
(
end_time
.
tv_sec
-
start_time
.
tv_sec
);
time
+=
static_cast
<
uint64_t
>
(
end_time
.
tv_usec
-
start_time
.
tv_usec
);
MS_LOG
(
INFO
)
<<
"EmbeddingLookUpCPUKernel, used time: "
<<
time
<<
" us"
;
#endif
return
true
;
}
void
memcpy_task
(
std
::
vector
<
float
*>
mem_dest_addr_list
,
std
::
vector
<
float
*>
mem_src_addr_list
,
size_t
start
,
size_t
end
,
size_t
lens
)
{
for
(
size_t
i
=
start
;
i
<
end
;
i
++
)
{
auto
ret
=
memcpy_s
(
mem_dest_addr_list
[
i
],
lens
,
mem_src_addr_list
[
i
],
lens
);
if
(
ret
!=
EOK
)
{
MS_LOG
(
EXCEPTION
)
<<
"memery copy failed."
;
}
}
return
;
}
void
EmbeddingLookUpCPUKernel
::
LookUpTable
(
const
std
::
vector
<
kernel
::
AddressPtr
>
&
inputs
,
size_t
dim0
,
size_t
dim1
,
size_t
dim2
,
float
**
output_addr
)
{
auto
input_addr
=
reinterpret_cast
<
float
*>
(
inputs
[
0
]
->
addr
);
auto
indices_addr
=
reinterpret_cast
<
int
*>
(
inputs
[
1
]
->
addr
);
size_t
num
=
CPUKernelUtils
::
GetElementNumOnAxis
(
input_shape_
,
axis_
);
size_t
lens
=
num
*
sizeof
(
float
);
std
::
vector
<
float
*>
mem_dest_addr_list
;
std
::
vector
<
float
*>
mem_src_addr_list
;
for
(
size_t
i
=
0
;
i
<
indices_lens_
;
++
i
)
{
int
indices
=
indices_addr
[
i
]
-
offset_
;
if
(
indices
>=
0
)
{
size_t
index
=
IntToSize
(
indices
);
if
(
index
<
input_shape_
[
axis_
])
{
size_t
pos
=
0
;
if
(
axis_
==
3
)
{
pos
=
CPUKernelUtils
::
CalcOffset
(
input_shape_
,
dim0
,
dim1
,
dim2
,
index
);
}
else
if
(
axis_
==
2
)
{
pos
=
CPUKernelUtils
::
CalcOffset
(
input_shape_
,
dim0
,
dim1
,
index
,
0
);
}
else
if
(
axis_
==
1
)
{
pos
=
CPUKernelUtils
::
CalcOffset
(
input_shape_
,
dim0
,
index
,
0
,
0
);
}
else
if
(
axis_
==
0
)
{
pos
=
CPUKernelUtils
::
CalcOffset
(
input_shape_
,
index
,
0
,
0
,
0
);
}
if
(
pos
+
num
<=
input_lens_
)
{
mem_dest_addr_list
.
push_back
(
*
output_addr
);
mem_src_addr_list
.
push_back
(
input_addr
+
pos
);
}
}
}
*
output_addr
+=
num
;
}
const
size_t
thread_num
=
8
;
std
::
thread
threads
[
8
];
size_t
memcpy_lens
=
mem_dest_addr_list
.
size
();
size_t
start
=
0
;
size_t
ones_copy_lens
=
(
memcpy_lens
+
thread_num
-
1
)
/
thread_num
;
size_t
i
;
for
(
i
=
0
;
i
<
thread_num
;
i
++
)
{
if
(
start
>
memcpy_lens
)
{
break
;
}
auto
end
=
(
start
+
ones_copy_lens
)
>
memcpy_lens
?
memcpy_lens
:
start
+
ones_copy_lens
;
threads
[
i
]
=
std
::
thread
(
memcpy_task
,
mem_dest_addr_list
,
mem_src_addr_list
,
start
,
end
,
lens
);
start
=
start
+
ones_copy_lens
;
}
for
(
size_t
j
=
0
;
j
<
i
;
j
++
)
{
threads
[
j
].
join
();
}
}
void
EmbeddingLookUpCPUKernel
::
CheckParam
(
const
CNodePtr
&
kernel_node
)
{
auto
input_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
if
(
input_shape
.
size
()
>
4
)
{
MS_LOG
(
EXCEPTION
)
<<
"Input dims is "
<<
input_shape
.
size
()
<<
", but EmbeddingLookUpCPUKernel olny support 4d or lower."
;
}
size_t
input_num
=
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
if
(
input_num
!=
2
)
{
MS_LOG
(
EXCEPTION
)
<<
"Argument number is "
<<
input_num
<<
", but EmbeddingLookUpCPUKernel needs 2."
;
}
}
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.h
0 → 100644
浏览文件 @
41d195b5
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
namespace
mindspore
{
namespace
kernel
{
class
EmbeddingLookUpCPUKernel
:
public
CPUKernel
{
public:
EmbeddingLookUpCPUKernel
()
{
axis_
=
0
;
offset_
=
0
;
split_num_
=
0
;
input_lens_
=
0
;
indices_lens_
=
0
;
gatherv2_out_lens_
=
0
;
reduce_scatter_flag_
=
false
;
gather_v2_out_
=
nullptr
;
}
~
EmbeddingLookUpCPUKernel
()
override
{
if
(
gather_v2_out_
!=
nullptr
)
{
free
(
gather_v2_out_
);
gather_v2_out_
=
nullptr
;
}
}
void
InitKernel
(
const
CNodePtr
&
kernel_node
)
override
;
bool
Launch
(
const
std
::
vector
<
AddressPtr
>
&
inputs
,
const
std
::
vector
<
AddressPtr
>
&
workspace
,
const
std
::
vector
<
AddressPtr
>
&
outputs
)
override
;
private:
void
LookUpTable
(
const
std
::
vector
<
kernel
::
AddressPtr
>
&
inputs
,
size_t
dim0
,
size_t
dim1
,
size_t
dim2
,
float
**
output_addr
);
void
CheckParam
(
const
CNodePtr
&
kernel_node
);
std
::
vector
<
size_t
>
input_shape_
;
std
::
vector
<
size_t
>
indices_shape_
;
std
::
vector
<
size_t
>
output_shape_
;
int
axis_
;
int
offset_
;
int
split_num_
;
size_t
input_lens_
;
size_t
indices_lens_
;
size_t
gatherv2_out_lens_
;
bool
reduce_scatter_flag_
;
void
*
gather_v2_out_
;
};
MS_REG_CPU_KERNEL
(
EmbeddingLookup
,
KernelAttr
().
AddInputAttr
(
kNumberTypeFloat32
).
AddInputAttr
(
kNumberTypeInt32
).
AddOutputAttr
(
kNumberTypeFloat32
),
EmbeddingLookUpCPUKernel
);
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_
mindspore/ccsrc/kernel/cpu/subscalar_cpu_kernel.cc
0 → 100644
浏览文件 @
41d195b5
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <thread>
#include "kernel/cpu/subscalar_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
namespace
mindspore
{
namespace
kernel
{
void
SubscalarCPUKernel
::
InitKernel
(
const
CNodePtr
&
kernel_node
)
{
offset_
=
AnfAlgo
::
GetNodeAttr
<
int
>
(
kernel_node
,
"input_y"
);
MS_LOG
(
DEBUG
)
<<
"offset: "
<<
offset_
;
}
void
sub_task
(
int
*
in_addr
,
int
*
out_addr
,
size_t
lens
,
int
offset
)
{
for
(
size_t
i
=
0
;
i
<
lens
;
i
++
)
{
out_addr
[
i
]
=
in_addr
[
i
]
-
offset
;
}
}
bool
SubscalarCPUKernel
::
Launch
(
const
std
::
vector
<
kernel
::
AddressPtr
>
&
inputs
,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
/*workspace*/
,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
outputs
)
{
#if defined(_WIN32) || defined(_WIN64)
auto
start_time
=
std
::
chrono
::
steady_clock
::
now
();
#else
struct
timeval
start_time
,
end_time
;
(
void
)
gettimeofday
(
&
start_time
,
nullptr
);
#endif
auto
input_addr
=
reinterpret_cast
<
int
*>
(
inputs
[
0
]
->
addr
);
auto
output_addr
=
reinterpret_cast
<
int
*>
(
outputs
[
0
]
->
addr
);
auto
lens
=
inputs
[
0
]
->
size
/
sizeof
(
int
);
if
(
lens
<
10000
)
{
for
(
size_t
i
=
0
;
i
<
lens
;
i
++
)
{
output_addr
[
i
]
=
input_addr
[
i
]
-
offset_
;
}
}
else
{
size_t
thread_num
=
4
;
std
::
thread
threads
[
4
];
size_t
process_lens
=
(
lens
+
thread_num
-
1
)
/
thread_num
;
size_t
process_offset
=
0
;
for
(
size_t
i
=
0
;
i
<
thread_num
;
i
++
)
{
threads
[
i
]
=
std
::
thread
(
sub_task
,
input_addr
+
process_offset
,
output_addr
+
process_offset
,
process_lens
,
offset_
);
if
(
process_offset
+
process_lens
>
lens
)
{
process_lens
=
lens
-
process_offset
;
process_offset
=
lens
;
}
else
{
process_offset
+=
process_lens
;
}
}
for
(
size_t
i
=
0
;
i
<
thread_num
;
i
++
)
{
threads
[
i
].
join
();
}
}
#if defined(_WIN32) || defined(_WIN64)
auto
end_time
=
std
::
chrono
::
steady_clock
::
now
();
std
::
chrono
::
duration
<
double
,
std
::
ratio
<
1
,
1000000
>>
cost
=
end_time
-
start_time
;
MS_LOG
(
INFO
)
<<
"SubscaleCPUKernel, used time: "
<<
cost
.
count
()
<<
" us"
;
#else
(
void
)
gettimeofday
(
&
end_time
,
nullptr
);
uint64_t
time
=
1000000
*
static_cast
<
uint64_t
>
(
end_time
.
tv_sec
-
start_time
.
tv_sec
);
time
+=
static_cast
<
uint64_t
>
(
end_time
.
tv_usec
-
start_time
.
tv_usec
);
MS_LOG
(
INFO
)
<<
"SubscalarCPUKernel, used time: "
<<
time
<<
" us"
;
#endif
return
true
;
}
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/kernel/cpu/subscalar_cpu_kernel.h
0 → 100644
浏览文件 @
41d195b5
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SUBSCALAR_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_SUBSCALAR_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
namespace
mindspore
{
namespace
kernel
{
class
SubscalarCPUKernel
:
public
CPUKernel
{
public:
SubscalarCPUKernel
()
:
offset_
(
0
)
{}
~
SubscalarCPUKernel
()
override
=
default
;
void
InitKernel
(
const
CNodePtr
&
kernel_node
)
override
;
bool
Launch
(
const
std
::
vector
<
AddressPtr
>
&
inputs
,
const
std
::
vector
<
AddressPtr
>
&
workspace
,
const
std
::
vector
<
AddressPtr
>
&
outputs
)
override
;
private:
int
offset_
;
};
MS_REG_CPU_KERNEL
(
Subscalar
,
KernelAttr
().
AddInputAttr
(
kNumberTypeInt32
).
AddOutputAttr
(
kNumberTypeInt32
),
SubscalarCPUKernel
);
}
// namespace kernel
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_SUBSCALAR_CPU_KERNEL_H_
mindspore/ccsrc/operator/ops.cc
浏览文件 @
41d195b5
...
...
@@ -133,6 +133,8 @@ const PrimitivePtr kPrimConcat = std::make_shared<Primitive>("Concat");
const
PrimitivePtr
kPrimSqueeze
=
std
::
make_shared
<
Primitive
>
(
"Squeeze"
);
const
PrimitivePtr
kPrimTranspose
=
std
::
make_shared
<
Primitive
>
(
"Transpose"
);
const
PrimitivePtr
kPrimGatherV2
=
std
::
make_shared
<
Primitive
>
(
"GatherV2"
);
const
PrimitivePtr
kPrimEmbeddingLookup
=
std
::
make_shared
<
Primitive
>
(
"EmbeddingLookup"
);
const
PrimitivePtr
kPrimEmbeddingLookupCommGrad
=
std
::
make_shared
<
Primitive
>
(
"EmbeddingLookupCommGrad"
);
const
PrimitivePtr
kPrimSize
=
std
::
make_shared
<
Primitive
>
(
"Size"
);
const
PrimitivePtr
kPrimArgMax
=
std
::
make_shared
<
Primitive
>
(
"Argmax"
);
const
PrimitivePtr
kPrimPack
=
std
::
make_shared
<
Primitive
>
(
"Pack"
);
...
...
@@ -168,6 +170,7 @@ const PrimitivePtr kPrimLess = std::make_shared<Primitive>("Less");
const
PrimitivePtr
kPrimLessEqual
=
std
::
make_shared
<
Primitive
>
(
"LessEqual"
);
const
PrimitivePtr
kPrimCumSum
=
std
::
make_shared
<
Primitive
>
(
"CumSum"
);
const
PrimitivePtr
kPrimCumProd
=
std
::
make_shared
<
Primitive
>
(
"CumProd"
);
const
PrimitivePtr
kPrimSubscalar
=
std
::
make_shared
<
Primitive
>
(
"Subscalar"
);
// NN
const
PrimitivePtr
kPrimFlatten
=
std
::
make_shared
<
Primitive
>
(
"Flatten"
);
...
...
mindspore/ccsrc/operator/ops.h
浏览文件 @
41d195b5
...
...
@@ -140,6 +140,8 @@ extern const PrimitivePtr kPrimConcat;
extern
const
PrimitivePtr
kPrimSqueeze
;
extern
const
PrimitivePtr
kPrimTranspose
;
extern
const
PrimitivePtr
kPrimGatherV2
;
extern
const
PrimitivePtr
kPrimEmbeddingLookup
;
extern
const
PrimitivePtr
kPrimEmbeddingLookupCommGrad
;
extern
const
PrimitivePtr
kPrimSize
;
extern
const
PrimitivePtr
kPrimArgMax
;
extern
const
PrimitivePtr
kPrimPack
;
...
...
@@ -176,6 +178,7 @@ extern const PrimitivePtr kPrimLess;
extern
const
PrimitivePtr
kPrimLessEqual
;
extern
const
PrimitivePtr
kPrimCumSum
;
extern
const
PrimitivePtr
kPrimCumProd
;
extern
const
PrimitivePtr
kPrimSubscalar
;
// NN
extern
const
PrimitivePtr
kPrimFlatten
;
...
...
mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
浏览文件 @
41d195b5
...
...
@@ -36,6 +36,9 @@ ConstInputToAttrInfoRegistry::ConstInputToAttrInfoRegistry() {
Register
(
prim
::
kPrimReduceSum
->
name
(),
{
1
});
Register
(
prim
::
kPrimReduceMean
->
name
(),
{
1
});
Register
(
prim
::
kPrimGatherV2
->
name
(),
{
2
});
Register
(
prim
::
kPrimEmbeddingLookup
->
name
(),
{
2
,
3
,
4
,
5
});
Register
(
prim
::
kPrimEmbeddingLookupCommGrad
->
name
(),
{
1
});
Register
(
prim
::
kPrimSubscalar
->
name
(),
{
1
});
Register
(
prim
::
kPrimTranspose
->
name
(),
{
1
});
Register
(
prim
::
kPrimUnsortedSegmentSum
->
name
(),
{
2
});
Register
(
prim
::
kPrimOneHot
->
name
(),
{
1
});
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录