Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
cc5a2408
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
cc5a2408
编写于
6月 23, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
6月 23, 2020
浏览文件
操作
浏览文件
下载
差异文件
!2491 add cpu kernel profiling log
Merge pull request !2491 from kisnwang/add-cpu-kernel-profiling
上级
88709569
dc29cfcb
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
18 addition
and
13 deletion
+18
-13
mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc
mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc
+8
-0
mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
+8
-11
mindspore/ccsrc/session/gpu_session.cc
mindspore/ccsrc/session/gpu_session.cc
+2
-2
未找到文件。
mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc
浏览文件 @
cc5a2408
...
...
@@ -26,6 +26,7 @@
#include "device/cpu/cpu_device_address.h"
#include "utils/context/ms_context.h"
#include "utils/config_manager.h"
#include "utils/profile.h"
#include "common/utils.h"
#include "session/anf_runtime_algorithm.h"
#include "session/session_basic.h"
...
...
@@ -270,6 +271,9 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph) {
auto
kernels
=
kernel_graph
->
execution_order
();
for
(
const
auto
&
kernel
:
kernels
)
{
#ifdef ENABLE_PROFILE
double
start_time
=
GetTime
();
#endif
std
::
vector
<
kernel
::
AddressPtr
>
kernel_inputs
;
std
::
vector
<
kernel
::
AddressPtr
>
kernel_workspaces
;
std
::
vector
<
kernel
::
AddressPtr
>
kernel_outputs
;
...
...
@@ -297,6 +301,10 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph) {
if
(
!
ret
)
{
MS_LOG
(
EXCEPTION
)
<<
"Launch kernel failed."
;
}
#ifdef ENABLE_PROFILE
double
cost_time
=
GetTime
()
-
start_time
;
MS_LOG
(
INFO
)
<<
"cpu kernel: "
<<
kernel
->
fullname_with_scope
()
<<
" costs "
<<
cost_time
*
1e6
<<
" us"
;
#endif
}
return
true
;
}
...
...
mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
浏览文件 @
cc5a2408
...
...
@@ -29,7 +29,7 @@ void ComputeFtrl(MultiThreadComputeParams *input_params, size_t start, size_t en
auto
linear
=
input_params
->
linear_
;
auto
lr
=
input_params
->
lr_
;
auto
l1
=
input_params
->
l1_
;
auto
l2
=
input_params
->
l2_
;
auto
l2
_plus
=
2
*
input_params
->
l2_
;
auto
lr_power
=
input_params
->
lr_power_
;
auto
unique_sparse_grad
=
input_params
->
sparse_grad_
;
auto
var_first_dim_size
=
input_params
->
var_first_dim_size_
;
...
...
@@ -44,21 +44,18 @@ void ComputeFtrl(MultiThreadComputeParams *input_params, size_t start, size_t en
for
(
size_t
j
=
start_index
,
k
=
var_outer_dim_size
*
i
;
j
<
end_index
;
++
j
,
++
k
)
{
auto
summed_grad
=
unique_sparse_grad
.
value_
[
k
];
auto
accum_new
=
accum
[
j
]
+
summed_grad
*
summed_grad
;
if
(
lr_power
==
-
0.5
)
{
linear
[
j
]
+=
summed_grad
-
(
std
::
sqrt
(
accum_new
)
-
std
::
sqrt
(
accum
[
j
]))
/
lr
*
var
[
j
];
}
else
{
linear
[
j
]
+=
summed_grad
-
(
std
::
pow
(
accum_new
,
-
lr_power
)
-
std
::
pow
(
accum
[
j
],
-
lr_power
))
/
lr
*
var
[
j
];
}
auto
x
=
Sign
(
linear
[
j
])
*
l1
-
linear
[
j
];
float
y
;
if
(
lr_power
==
-
0.5
)
{
y
=
std
::
sqrt
(
accum_new
)
/
lr
+
2
*
l2
;
y
=
std
::
sqrt
(
accum_new
);
linear
[
j
]
+=
summed_grad
-
(
y
-
std
::
sqrt
(
accum
[
j
]))
/
lr
*
var
[
j
];
}
else
{
y
=
std
::
pow
(
accum_new
,
-
lr_power
)
/
lr
+
2
*
l2
;
y
=
std
::
pow
(
accum_new
,
-
lr_power
);
linear
[
j
]
+=
summed_grad
-
(
y
-
std
::
pow
(
accum
[
j
],
-
lr_power
))
/
lr
*
var
[
j
];
}
auto
pre_shrink
=
x
/
y
;
var
[
j
]
=
std
::
fabs
(
linear
[
j
])
>
l1
?
pre_shrink
:
0
;
accum
[
j
]
=
accum_new
;
auto
x
=
Sign
(
linear
[
j
])
*
l1
-
linear
[
j
];
y
=
y
/
lr
+
l2_plus
;
var
[
j
]
=
std
::
fabs
(
linear
[
j
])
>
l1
?
x
/
y
:
0
;
}
}
}
...
...
mindspore/ccsrc/session/gpu_session.cc
浏览文件 @
cc5a2408
...
...
@@ -112,10 +112,10 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
auto
tensor_address
=
tensor
->
device_address
();
bool
need_sync
=
false
;
if
(
ms_context
->
enable_pynative_infer
())
{
if
(
tensor_address
.
get
()
==
nullptr
||
tensor_address
!=
device_address
)
{
if
(
tensor_address
==
nullptr
||
tensor_address
!=
device_address
)
{
need_sync
=
true
;
}
}
else
if
(
tensor
->
is_dirty
())
{
}
else
if
(
tensor
->
is_dirty
()
||
tensor_address
==
nullptr
)
{
need_sync
=
true
;
}
else
if
(
tensor_address
!=
device_address
)
{
if
(
tensor_address
->
DeviceType
()
==
device_address
->
DeviceType
())
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录