Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
12e7ddae
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
12e7ddae
编写于
6月 22, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
6月 22, 2020
浏览文件
操作
浏览文件
下载
差异文件
!2386 Add multiple process for computation of optimizer in cpu
Merge pull request !2386 from YuJianfeng/master
上级
f96bf04b
6c1bc1c6
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
260 addition
and
103 deletion
+260
-103
mindspore/ccsrc/kernel/common_utils.cc
mindspore/ccsrc/kernel/common_utils.cc
+17
-0
mindspore/ccsrc/kernel/common_utils.h
mindspore/ccsrc/kernel/common_utils.h
+23
-0
mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc
mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc
+75
-34
mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.h
mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.h
+0
-3
mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
+54
-27
mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
...ore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
+47
-18
mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
...rc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
+44
-21
未找到文件。
mindspore/ccsrc/kernel/common_utils.cc
浏览文件 @
12e7ddae
...
...
@@ -20,6 +20,7 @@
#include <iostream>
#include <utility>
#include <fstream>
#include <thread>
#include "nlohmann/json.hpp"
#include "session/anf_runtime_algorithm.h"
#include "common/utils.h"
...
...
@@ -876,5 +877,21 @@ bool IsWeightBoundary(const AnfNodePtr &node) {
}
return
false
;
}
void
MultiThreadCompute
(
const
MultiThreadComputeFunc
&
func
,
MultiThreadComputeParams
*
params
,
size_t
thread_num
,
size_t
total_compute_size
)
{
std
::
vector
<
std
::
thread
>
threads
;
threads
.
reserve
(
thread_num
);
size_t
start
=
0
;
size_t
once_compute_size
=
(
total_compute_size
+
thread_num
-
1
)
/
thread_num
;
while
(
start
<
total_compute_size
)
{
size_t
end
=
(
start
+
once_compute_size
)
>
total_compute_size
?
total_compute_size
:
(
start
+
once_compute_size
);
threads
.
emplace_back
(
std
::
thread
(
func
,
params
,
start
,
end
));
start
+=
once_compute_size
;
}
for
(
size_t
i
=
0
;
i
<
threads
.
size
();
++
i
)
{
threads
[
i
].
join
();
}
}
}
// namespace kernel
}
// namespace mindspore
mindspore/ccsrc/kernel/common_utils.h
浏览文件 @
12e7ddae
...
...
@@ -78,6 +78,27 @@ struct SparseGradient {
size_t
indices_size_
;
};
struct
MultiThreadComputeParams
{
float
*
var_
;
float
*
accum_
;
float
*
linear_
;
float
*
m_
;
float
*
m_t_
;
float
*
v_
;
float
lr_
;
float
l1_
;
float
l2_
;
float
lr_power_
;
float
beta1_
;
float
beta2_
;
float
epsilon_
;
SparseGradient
sparse_grad_
;
size_t
var_first_dim_size_
;
size_t
var_outer_dim_size_
;
bool
use_nesterov_
;
};
using
MultiThreadComputeFunc
=
std
::
function
<
void
(
MultiThreadComputeParams
*
param
,
size_t
start
,
size_t
end
)
>
;
bool
CheckCache
(
const
std
::
string
&
kernel_name
);
KernelPackPtr
SearchCache
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
processor
);
KernelPackPtr
InsertCache
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
processor
);
...
...
@@ -107,6 +128,8 @@ void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr>
bool
GetInputTensorValue
(
const
AnfNodePtr
&
anf_node
,
size_t
input_idx
,
nlohmann
::
json
*
const
node_json
);
void
GetGraphRealOutput
(
const
FuncGraphPtr
&
func_graph
,
std
::
vector
<
std
::
pair
<
AnfNodePtr
,
size_t
>>
*
node_list
);
bool
IsWeightBoundary
(
const
AnfNodePtr
&
node
);
void
MultiThreadCompute
(
const
MultiThreadComputeFunc
&
func
,
MultiThreadComputeParams
*
params
,
size_t
thread_num
,
size_t
total_compute_size
);
}
// namespace kernel
}
// namespace mindspore
...
...
mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc
浏览文件 @
12e7ddae
...
...
@@ -14,12 +14,66 @@
* limitations under the License.
*/
#include "kernel/cpu/sparse_apply_adam_cpu_kernel.h"
#include "kernel/common_utils.h"
#include "device/cpu/cpu_device_address.h"
namespace
mindspore
{
namespace
kernel
{
namespace
{
constexpr
size_t
kSparseApplyAdamInputSize
=
11
;
void
ComputeAdam
(
MultiThreadComputeParams
*
input_params
,
size_t
start
,
size_t
end
)
{
MS_EXCEPTION_IF_NULL
(
input_params
);
auto
m
=
input_params
->
m_
;
auto
m_t
=
input_params
->
m_t_
;
auto
v
=
input_params
->
v_
;
auto
beta1
=
input_params
->
beta1_
;
auto
beta2
=
input_params
->
beta2_
;
auto
use_nesterov
=
input_params
->
use_nesterov_
;
auto
unique_sparse_grad
=
input_params
->
sparse_grad_
;
auto
var_first_dim_size
=
input_params
->
var_first_dim_size_
;
auto
var_outer_dim_size
=
input_params
->
var_outer_dim_size_
;
for
(
size_t
i
=
start
;
i
<
end
;
++
i
)
{
int
index
=
unique_sparse_grad
.
indices_
[
i
];
if
(
index
<
0
||
IntToSize
(
index
)
>=
var_first_dim_size
)
{
MS_LOG
(
EXCEPTION
)
<<
"Index "
<<
index
<<
" in indices is out of range after unique process"
;
}
size_t
start_index
=
var_outer_dim_size
*
index
;
size_t
end_index
=
start_index
+
var_outer_dim_size
;
for
(
size_t
j
=
start_index
,
k
=
var_outer_dim_size
*
i
;
j
<
end_index
;
++
j
,
++
k
)
{
auto
summed_grad
=
unique_sparse_grad
.
value_
[
k
];
m
[
j
]
+=
(
1
-
beta1
)
*
summed_grad
;
v
[
j
]
+=
(
1
-
beta2
)
*
summed_grad
*
summed_grad
;
if
(
use_nesterov
)
{
m_t
[
j
]
=
m
[
j
]
*
beta1
+
(
1
-
beta1
)
*
summed_grad
;
}
}
}
}
void
ComputeMomentum
(
MultiThreadComputeParams
*
input_params
,
size_t
start
,
size_t
end
)
{
MS_EXCEPTION_IF_NULL
(
input_params
);
auto
m
=
input_params
->
m_
;
auto
v
=
input_params
->
v_
;
auto
beta1
=
input_params
->
beta1_
;
auto
beta2
=
input_params
->
beta2_
;
for
(
size_t
i
=
start
;
i
<
end
;
++
i
)
{
m
[
i
]
*=
beta1
;
v
[
i
]
*=
beta2
;
}
}
void
ComputeWeight
(
MultiThreadComputeParams
*
input_params
,
size_t
start
,
size_t
end
)
{
MS_EXCEPTION_IF_NULL
(
input_params
);
auto
var
=
input_params
->
var_
;
auto
m
=
input_params
->
m_
;
auto
v
=
input_params
->
v_
;
auto
lr
=
input_params
->
lr_
;
auto
epsilon
=
input_params
->
epsilon_
;
for
(
size_t
i
=
start
;
i
<
end
;
++
i
)
{
var
[
i
]
-=
lr
*
m
[
i
]
/
(
std
::
sqrt
(
v
[
i
])
+
epsilon
);
}
}
}
// namespace
void
SparseApplyAdamCPUKernel
::
InitInputOutputSize
(
const
CNodePtr
&
kernel_node
)
{
...
...
@@ -64,29 +118,6 @@ void SparseApplyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) {
}
}
void
SparseApplyAdamCPUKernel
::
UpdateSparseMomentum
(
const
SparseGradient
&
unique_sparse_grad
,
float
*
m
,
float
*
m_t
,
float
*
v
,
float
beta1
,
float
beta2
)
const
{
MS_EXCEPTION_IF_NULL
(
m
);
MS_EXCEPTION_IF_NULL
(
m_t
);
MS_EXCEPTION_IF_NULL
(
v
);
for
(
size_t
i
=
0
;
i
<
unique_sparse_grad
.
indices_size_
;
++
i
)
{
int
index
=
unique_sparse_grad
.
indices_
[
i
];
if
(
index
<
0
||
IntToSize
(
index
)
>=
var_first_dim_size_
)
{
MS_LOG
(
EXCEPTION
)
<<
"Index "
<<
index
<<
" in indices is out of range after unique process"
;
}
size_t
start_index
=
var_outer_dim_size_
*
index
;
size_t
end_index
=
start_index
+
var_outer_dim_size_
;
for
(
size_t
j
=
start_index
,
k
=
var_outer_dim_size_
*
i
;
j
<
end_index
;
++
j
,
++
k
)
{
auto
summed_grad
=
unique_sparse_grad
.
value_
[
k
];
m
[
j
]
+=
(
1
-
beta1
)
*
summed_grad
;
v
[
j
]
+=
(
1
-
beta2
)
*
summed_grad
*
summed_grad
;
if
(
use_nesterov_
)
{
m_t
[
j
]
=
m
[
j
]
*
beta1
+
(
1
-
beta1
)
*
summed_grad
;
}
}
}
}
bool
SparseApplyAdamCPUKernel
::
Launch
(
const
std
::
vector
<
kernel
::
AddressPtr
>
&
inputs
,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
workspace
,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
/*outputs*/
)
{
...
...
@@ -115,21 +146,31 @@ bool SparseApplyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
ReduceSparseGradient
(
SparseGradient
({
grad
,
indices
,
indices_size_
}),
&
unique_sparse_grad
,
var_first_dim_size_
,
var_outer_dim_size_
);
size_t
total_dim_size
=
var_first_dim_size_
*
var_outer_dim_size_
;
// Update momentum
lr
=
lr
*
std
::
sqrt
(
1
-
beta2_power
)
/
(
1
-
beta1_power
);
for
(
size_t
i
=
0
;
i
<
total_dim_size
;
++
i
)
{
m
[
i
]
*=
beta1
;
v
[
i
]
*=
beta2
;
}
MultiThreadComputeParams
input_params
;
input_params
.
m_
=
m
;
input_params
.
v_
=
v
;
input_params
.
beta1_
=
beta1
;
input_params
.
beta2_
=
beta2
;
const
size_t
kThreadNum
=
16
;
MultiThreadCompute
(
ComputeMomentum
,
&
input_params
,
kThreadNum
,
total_dim_size
);
std
::
vector
<
float
>
m_t
(
m
,
m
+
total_dim_size
);
UpdateSparseMomentum
(
unique_sparse_grad
,
m
,
m_t
.
data
(),
v
,
beta1
,
beta2
);
// Update weight
input_params
.
m_t_
=
m_t
.
data
();
input_params
.
use_nesterov_
=
use_nesterov_
;
input_params
.
sparse_grad_
=
unique_sparse_grad
;
input_params
.
var_first_dim_size_
=
var_first_dim_size_
;
input_params
.
var_outer_dim_size_
=
var_outer_dim_size_
;
MultiThreadCompute
(
ComputeAdam
,
&
input_params
,
kThreadNum
,
unique_sparse_grad
.
indices_size_
);
if
(
use_nesterov_
)
{
m
=
m_t
.
data
();
}
for
(
size_t
i
=
0
;
i
<
total_dim_size
;
++
i
)
{
var
[
i
]
-=
lr
*
m
[
i
]
/
(
std
::
sqrt
(
v
[
i
])
+
epsilon
);
input_params
.
m_
=
input_params
.
m_t_
;
}
input_params
.
var_
=
var
;
input_params
.
lr_
=
lr
;
input_params
.
epsilon_
=
epsilon
;
MultiThreadCompute
(
ComputeWeight
,
&
input_params
,
kThreadNum
,
total_dim_size
);
return
true
;
}
}
// namespace kernel
...
...
mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.h
浏览文件 @
12e7ddae
...
...
@@ -20,7 +20,6 @@
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "kernel/common_utils.h"
namespace
mindspore
{
namespace
kernel
{
...
...
@@ -35,8 +34,6 @@ class SparseApplyAdamCPUKernel : public CPUKernel {
const
std
::
vector
<
AddressPtr
>
&
outputs
)
override
;
private:
void
UpdateSparseMomentum
(
const
SparseGradient
&
unique_sparse_grad
,
float
*
m
,
float
*
m_t
,
float
*
v
,
float
beta1
,
float
beta2
)
const
;
size_t
indices_size_
{
0
};
size_t
var_first_dim_size_
{
0
};
size_t
var_outer_dim_size_
{
1
};
...
...
mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
浏览文件 @
12e7ddae
...
...
@@ -21,6 +21,47 @@ namespace mindspore {
namespace
kernel
{
namespace
{
constexpr
size_t
kSparseApplyFtrlInputSize
=
5
;
void
ComputeFtrl
(
MultiThreadComputeParams
*
input_params
,
size_t
start
,
size_t
end
)
{
MS_EXCEPTION_IF_NULL
(
input_params
);
auto
var
=
input_params
->
var_
;
auto
accum
=
input_params
->
accum_
;
auto
linear
=
input_params
->
linear_
;
auto
lr
=
input_params
->
lr_
;
auto
l1
=
input_params
->
l1_
;
auto
l2
=
input_params
->
l2_
;
auto
lr_power
=
input_params
->
lr_power_
;
auto
unique_sparse_grad
=
input_params
->
sparse_grad_
;
auto
var_first_dim_size
=
input_params
->
var_first_dim_size_
;
auto
var_outer_dim_size
=
input_params
->
var_outer_dim_size_
;
for
(
size_t
i
=
start
;
i
<
end
;
++
i
)
{
int
index
=
unique_sparse_grad
.
indices_
[
i
];
if
(
index
<
0
||
IntToSize
(
index
)
>=
var_first_dim_size
)
{
MS_LOG
(
EXCEPTION
)
<<
"Index "
<<
index
<<
" in indices is out of range after unique process"
;
}
size_t
start_index
=
var_outer_dim_size
*
index
;
size_t
end_index
=
start_index
+
var_outer_dim_size
;
for
(
size_t
j
=
start_index
,
k
=
var_outer_dim_size
*
i
;
j
<
end_index
;
++
j
,
++
k
)
{
auto
summed_grad
=
unique_sparse_grad
.
value_
[
k
];
auto
accum_new
=
accum
[
j
]
+
summed_grad
*
summed_grad
;
if
(
lr_power
==
-
0.5
)
{
linear
[
j
]
+=
summed_grad
-
(
std
::
sqrt
(
accum_new
)
-
std
::
sqrt
(
accum
[
j
]))
/
lr
*
var
[
j
];
}
else
{
linear
[
j
]
+=
summed_grad
-
(
std
::
pow
(
accum_new
,
-
lr_power
)
-
std
::
pow
(
accum
[
j
],
-
lr_power
))
/
lr
*
var
[
j
];
}
auto
x
=
Sign
(
linear
[
j
])
*
l1
-
linear
[
j
];
float
y
;
if
(
lr_power
==
-
0.5
)
{
y
=
std
::
sqrt
(
accum_new
)
/
lr
+
2
*
l2
;
}
else
{
y
=
std
::
pow
(
accum_new
,
-
lr_power
)
/
lr
+
2
*
l2
;
}
auto
pre_shrink
=
x
/
y
;
var
[
j
]
=
std
::
fabs
(
linear
[
j
])
>
l1
?
pre_shrink
:
0
;
accum
[
j
]
=
accum_new
;
}
}
}
}
// namespace
void
SparseApplyFtrlCPUKernel
::
InitInputOutputSize
(
const
CNodePtr
&
kernel_node
)
{
...
...
@@ -96,33 +137,19 @@ bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
ReduceSparseGradient
(
SparseGradient
({
grad
,
indices
,
indices_size_
}),
&
unique_sparse_grad
,
var_first_dim_size_
,
var_outer_dim_size_
);
for
(
size_t
i
=
0
;
i
<
unique_sparse_grad
.
indices_size_
;
++
i
)
{
int
index
=
unique_sparse_grad
.
indices_
[
i
];
if
(
index
<
0
||
IntToSize
(
index
)
>=
var_first_dim_size_
)
{
MS_LOG
(
EXCEPTION
)
<<
"Index "
<<
index
<<
" in indices is out of range after unique process"
;
}
size_t
start_index
=
var_outer_dim_size_
*
index
;
size_t
end_index
=
start_index
+
var_outer_dim_size_
;
for
(
size_t
j
=
start_index
,
k
=
var_outer_dim_size_
*
i
;
j
<
end_index
;
++
j
,
++
k
)
{
auto
summed_grad
=
unique_sparse_grad
.
value_
[
k
];
auto
accum_new
=
accum
[
j
]
+
summed_grad
*
summed_grad
;
if
(
lr_power_
==
-
0.5
)
{
linear
[
j
]
+=
summed_grad
-
(
std
::
sqrt
(
accum_new
)
-
std
::
sqrt
(
accum
[
j
]))
/
lr_
*
var
[
j
];
}
else
{
linear
[
j
]
+=
summed_grad
-
(
std
::
pow
(
accum_new
,
-
lr_power_
)
-
std
::
pow
(
accum
[
j
],
-
lr_power_
))
/
lr_
*
var
[
j
];
}
auto
x
=
Sign
(
linear
[
j
])
*
l1_
-
linear
[
j
];
float
y
;
if
(
lr_power_
==
-
0.5
)
{
y
=
std
::
sqrt
(
accum_new
)
/
lr_
+
2
*
l2_
;
}
else
{
y
=
std
::
pow
(
accum_new
,
-
lr_power_
)
/
lr_
+
2
*
l2_
;
}
auto
pre_shrink
=
x
/
y
;
var
[
j
]
=
std
::
fabs
(
linear
[
j
])
>
l1_
?
pre_shrink
:
0
;
accum
[
j
]
=
accum_new
;
}
}
MultiThreadComputeParams
input_params
;
input_params
.
var_
=
var
;
input_params
.
accum_
=
accum
;
input_params
.
linear_
=
linear
;
input_params
.
lr_
=
lr_
;
input_params
.
l1_
=
l1_
;
input_params
.
l2_
=
l2_
;
input_params
.
lr_power_
=
lr_power_
;
input_params
.
sparse_grad_
=
unique_sparse_grad
;
input_params
.
var_first_dim_size_
=
var_first_dim_size_
;
input_params
.
var_outer_dim_size_
=
var_outer_dim_size_
;
const
size_t
kThreadNum
=
16
;
MultiThreadCompute
(
ComputeFtrl
,
&
input_params
,
kThreadNum
,
unique_sparse_grad
.
indices_size_
);
return
true
;
}
}
// namespace kernel
...
...
mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
浏览文件 @
12e7ddae
...
...
@@ -21,6 +21,39 @@ namespace mindspore {
namespace
kernel
{
namespace
{
constexpr
size_t
kSparseApplyLazyAdamInputSize
=
11
;
void
ComputeLazyAdam
(
MultiThreadComputeParams
*
input_params
,
size_t
start
,
size_t
end
)
{
MS_EXCEPTION_IF_NULL
(
input_params
);
auto
var
=
input_params
->
var_
;
auto
m
=
input_params
->
m_
;
auto
v
=
input_params
->
v_
;
auto
lr
=
input_params
->
lr_
;
auto
beta1
=
input_params
->
beta1_
;
auto
beta2
=
input_params
->
beta2_
;
auto
epsilon
=
input_params
->
epsilon_
;
auto
use_nesterov
=
input_params
->
use_nesterov_
;
auto
unique_sparse_grad
=
input_params
->
sparse_grad_
;
auto
var_first_dim_size
=
input_params
->
var_first_dim_size_
;
auto
var_outer_dim_size
=
input_params
->
var_outer_dim_size_
;
for
(
size_t
i
=
start
;
i
<
end
;
++
i
)
{
int
index
=
unique_sparse_grad
.
indices_
[
i
];
if
(
index
<
0
||
IntToSize
(
index
)
>=
var_first_dim_size
)
{
MS_LOG
(
EXCEPTION
)
<<
"Index "
<<
index
<<
" in indices is out of range"
;
}
size_t
start_index
=
var_outer_dim_size
*
index
;
size_t
end_index
=
start_index
+
var_outer_dim_size
;
for
(
size_t
j
=
start_index
,
k
=
var_outer_dim_size
*
i
;
j
<
end_index
;
++
j
,
++
k
)
{
auto
summed_grad
=
unique_sparse_grad
.
value_
[
k
];
m
[
j
]
=
beta1
*
m
[
j
]
+
(
1
-
beta1
)
*
summed_grad
;
v
[
j
]
=
beta2
*
v
[
j
]
+
(
1
-
beta2
)
*
summed_grad
*
summed_grad
;
if
(
use_nesterov
)
{
var
[
j
]
-=
lr
*
(
m
[
j
]
*
beta1
+
(
1
-
beta1
)
*
summed_grad
)
/
(
std
::
sqrt
(
v
[
j
])
+
epsilon
);
}
else
{
var
[
j
]
-=
lr
*
m
[
j
]
/
(
std
::
sqrt
(
v
[
j
])
+
epsilon
);
}
}
}
}
}
// namespace
void
SparseApplyLazyAdamCPUKernel
::
InitInputOutputSize
(
const
CNodePtr
&
kernel_node
)
{
...
...
@@ -94,24 +127,20 @@ bool SparseApplyLazyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr>
var_outer_dim_size_
);
lr
=
lr
*
std
::
sqrt
(
1
-
beta2_power
)
/
(
1
-
beta1_power
);
for
(
size_t
i
=
0
;
i
<
unique_sparse_grad
.
indices_size_
;
++
i
)
{
int
index
=
unique_sparse_grad
.
indices_
[
i
];
if
(
index
<
0
||
IntToSize
(
index
)
>=
var_first_dim_size_
)
{
MS_LOG
(
EXCEPTION
)
<<
"Index "
<<
index
<<
" in indices is out of range"
;
}
size_t
start_index
=
var_outer_dim_size_
*
index
;
size_t
end_index
=
start_index
+
var_outer_dim_size_
;
for
(
size_t
j
=
start_index
,
k
=
var_outer_dim_size_
*
i
;
j
<
end_index
;
++
j
,
++
k
)
{
auto
summed_grad
=
unique_sparse_grad
.
value_
[
k
];
m
[
j
]
=
beta1
*
m
[
j
]
+
(
1
-
beta1
)
*
summed_grad
;
v
[
j
]
=
beta2
*
v
[
j
]
+
(
1
-
beta2
)
*
summed_grad
*
summed_grad
;
if
(
use_nesterov_
)
{
var
[
j
]
-=
lr
*
(
m
[
j
]
*
beta1
+
(
1
-
beta1
)
*
summed_grad
)
/
(
std
::
sqrt
(
v
[
j
])
+
epsilon
);
}
else
{
var
[
j
]
-=
lr
*
m
[
j
]
/
(
std
::
sqrt
(
v
[
j
])
+
epsilon
);
}
}
}
MultiThreadComputeParams
input_params
;
input_params
.
var_
=
var
;
input_params
.
m_
=
m
;
input_params
.
v_
=
v
;
input_params
.
lr_
=
lr
;
input_params
.
beta1_
=
beta1
;
input_params
.
beta2_
=
beta2
;
input_params
.
epsilon_
=
epsilon
;
input_params
.
use_nesterov_
=
use_nesterov_
;
input_params
.
sparse_grad_
=
unique_sparse_grad
;
input_params
.
var_first_dim_size_
=
var_first_dim_size_
;
input_params
.
var_outer_dim_size_
=
var_outer_dim_size_
;
const
size_t
kThreadNum
=
16
;
MultiThreadCompute
(
ComputeLazyAdam
,
&
input_params
,
kThreadNum
,
unique_sparse_grad
.
indices_size_
);
return
true
;
}
}
// namespace kernel
...
...
mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
浏览文件 @
12e7ddae
...
...
@@ -21,6 +21,39 @@ namespace mindspore {
namespace
kernel
{
namespace
{
constexpr
size_t
kSparseApplyProximalAdagradInputSize
=
7
;
void
ComputeProximalAdagrad
(
MultiThreadComputeParams
*
input_params
,
size_t
start
,
size_t
end
)
{
MS_EXCEPTION_IF_NULL
(
input_params
);
auto
var
=
input_params
->
var_
;
auto
accum
=
input_params
->
accum_
;
auto
lr
=
input_params
->
lr_
;
auto
l1
=
input_params
->
l1_
;
auto
l2
=
input_params
->
l2_
;
auto
unique_sparse_grad
=
input_params
->
sparse_grad_
;
auto
var_first_dim_size
=
input_params
->
var_first_dim_size_
;
auto
var_outer_dim_size
=
input_params
->
var_outer_dim_size_
;
for
(
size_t
i
=
start
;
i
<
end
;
++
i
)
{
int
index
=
unique_sparse_grad
.
indices_
[
i
];
if
(
index
<
0
||
IntToSize
(
index
)
>=
var_first_dim_size
)
{
MS_LOG
(
EXCEPTION
)
<<
"Index "
<<
index
<<
" in indices is out of range after unique process"
;
}
size_t
start_index
=
var_outer_dim_size
*
index
;
size_t
end_index
=
start_index
+
var_outer_dim_size
;
for
(
size_t
j
=
start_index
,
k
=
var_outer_dim_size
*
i
;
j
<
end_index
;
++
j
,
++
k
)
{
auto
summed_grad
=
unique_sparse_grad
.
value_
[
k
];
accum
[
j
]
+=
summed_grad
*
summed_grad
;
auto
learning_rate
=
lr
*
(
1
/
std
::
sqrt
(
accum
[
j
]));
auto
prox_v
=
var
[
j
];
prox_v
-=
summed_grad
*
learning_rate
;
if
(
l1
>
0
)
{
var
[
j
]
=
Sign
(
prox_v
)
*
std
::
fmax
(
std
::
fabs
(
prox_v
)
-
learning_rate
*
l1
,
static_cast
<
float
>
(
0.0
))
/
(
1
+
l2
*
learning_rate
);
}
else
{
var
[
j
]
=
prox_v
/
(
1
+
l2
*
learning_rate
);
}
}
}
}
}
// namespace
void
SparseApplyProximalAdagradCPUKernel
::
InitInputOutputSize
(
const
CNodePtr
&
kernel_node
)
{
...
...
@@ -90,27 +123,17 @@ bool SparseApplyProximalAdagradCPUKernel::Launch(const std::vector<kernel::Addre
ReduceSparseGradient
(
SparseGradient
({
grad
,
indices
,
indices_size_
}),
&
unique_sparse_grad
,
var_first_dim_size_
,
var_outer_dim_size_
);
for
(
size_t
i
=
0
;
i
<
unique_sparse_grad
.
indices_size_
;
++
i
)
{
int
index
=
unique_sparse_grad
.
indices_
[
i
];
if
(
index
<
0
||
IntToSize
(
index
)
>=
var_first_dim_size_
)
{
MS_LOG
(
EXCEPTION
)
<<
"Index "
<<
index
<<
" in indices is out of range after unique process"
;
}
size_t
start_index
=
var_outer_dim_size_
*
index
;
size_t
end_index
=
start_index
+
var_outer_dim_size_
;
for
(
size_t
j
=
start_index
,
k
=
var_outer_dim_size_
*
i
;
j
<
end_index
;
++
j
,
++
k
)
{
auto
summed_grad
=
unique_sparse_grad
.
value_
[
k
];
accum
[
j
]
+=
summed_grad
*
summed_grad
;
auto
learning_rate
=
lr
*
(
1
/
std
::
sqrt
(
accum
[
j
]));
auto
prox_v
=
var
[
j
];
prox_v
-=
summed_grad
*
learning_rate
;
if
(
l1
>
0
)
{
var
[
j
]
=
Sign
(
prox_v
)
*
std
::
fmax
(
std
::
fabs
(
prox_v
)
-
learning_rate
*
l1
,
static_cast
<
float
>
(
0.0
))
/
(
1
+
l2
*
learning_rate
);
}
else
{
var
[
j
]
=
prox_v
/
(
1
+
l2
*
learning_rate
);
}
}
}
MultiThreadComputeParams
input_params
;
input_params
.
var_
=
var
;
input_params
.
accum_
=
accum
;
input_params
.
lr_
=
lr
;
input_params
.
l1_
=
l1
;
input_params
.
l2_
=
l2
;
input_params
.
sparse_grad_
=
unique_sparse_grad
;
input_params
.
var_first_dim_size_
=
var_first_dim_size_
;
input_params
.
var_outer_dim_size_
=
var_outer_dim_size_
;
const
size_t
kThreadNum
=
16
;
MultiThreadCompute
(
ComputeProximalAdagrad
,
&
input_params
,
kThreadNum
,
unique_sparse_grad
.
indices_size_
);
return
true
;
}
}
// namespace kernel
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录