Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
2b74b739
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2b74b739
编写于
3月 25, 2022
作者:
A
Aganlengzi
提交者:
GitHub
3月 25, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] add merged_momentum (#40875)
* [NPU] add merged_momentum * fix * fix device
上级
139a30ec
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
540 addition
and
0 deletion
+540
-0
paddle/fluid/operators/optimizers/merged_momentum_op_npu.cc
paddle/fluid/operators/optimizers/merged_momentum_op_npu.cc
+167
-0
python/paddle/fluid/tests/unittests/npu/test_merged_momentum_op_npu.py
.../fluid/tests/unittests/npu/test_merged_momentum_op_npu.py
+373
-0
未找到文件。
paddle/fluid/operators/optimizers/merged_momentum_op_npu.cc
0 → 100644
浏览文件 @
2b74b739
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/optimizers/merged_momentum_op.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
class
NPUMergedMomentumOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
params
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"Param"
);
auto
params_out
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"ParamOut"
);
size_t
n
=
params
.
size
();
PADDLE_ENFORCE_EQ
(
n
,
params_out
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The size of Output(ParamOut) must be equal to "
"Input(Param), but got the size of Output(ParamOut) "
"is %d, the size of Input(Param) is %d."
,
params_out
.
size
(),
n
));
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
{
PADDLE_ENFORCE_EQ
(
params
[
i
],
params_out
[
i
],
platform
::
errors
::
InvalidArgument
(
"The size of Input(Param) and Output(ParamOut) "
"must be the same Tensors."
));
}
auto
grads
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"Grad"
);
PADDLE_ENFORCE_EQ
(
n
,
grads
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The size of Input(Grad) must be equal to Input(Param), but got "
"the size of Input(Grad) is %d, the size of Input(Param) is %d."
,
grads
.
size
(),
n
));
auto
velocitys
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"Velocity"
);
PADDLE_ENFORCE_EQ
(
n
,
velocitys
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The size of Input(Velocity) must be equal to "
"Input(Param), but got the size of Input(Velocity) "
"is %d, the size of Input(Param) is %d."
,
velocitys
.
size
(),
n
));
auto
velocitys_out
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"VelocityOut"
);
PADDLE_ENFORCE_EQ
(
n
,
velocitys_out
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The size of Output(VelocityOut) must be "
"equal to Input(Param), but got the size of Output(VelocityOut) is "
"%d, the size of Input(Param) is %d."
,
velocitys_out
.
size
(),
n
));
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
{
PADDLE_ENFORCE_EQ
(
velocitys
[
i
],
velocitys_out
[
i
],
platform
::
errors
::
InvalidArgument
(
"Input(Velocity) and Output(VelocityOut) must be "
"the same Tensors."
));
}
T
mu
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"mu"
));
auto
lrs
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"LearningRate"
);
if
(
lrs
.
size
()
!=
1
)
{
PADDLE_ENFORCE_EQ
(
n
,
lrs
.
size
(),
platform
::
errors
::
InvalidArgument
(
"If the size of Input(LearningRate) is not 1, the size of "
"Input(LearningRate) must be "
"equal to Input(Param), but got the size of Input(LearningRate) "
"is %d, the size of Input(Param) is %d."
,
lrs
.
size
(),
n
));
}
auto
use_nesterov
=
ctx
.
Attr
<
bool
>
(
"use_nesterov"
);
auto
regularization_methods
=
ctx
.
Attr
<
std
::
vector
<
std
::
string
>>
(
"regularization_method"
);
auto
regularization_coeffs
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"regularization_coeff"
);
if
(
regularization_methods
.
size
()
!=
0
)
{
PADDLE_ENFORCE_EQ
(
n
,
regularization_methods
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The size of Attr(regularization_method) must be equal "
"to Input(Param), but got the size of "
"Attr(regularization_method) is %d, the size of Input(Param) is "
"%d."
,
regularization_methods
.
size
(),
n
));
PADDLE_ENFORCE_EQ
(
n
,
regularization_coeffs
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The size of Attr(regularization_coeff) must be equal "
"to Input(Param), but got the size of Attr(regularization_coeff) "
"is %d, the size of Input(Param) is %d."
,
regularization_coeffs
.
size
(),
n
));
}
VLOG
(
5
)
<<
"use_nesterov: "
<<
use_nesterov
<<
", regularization_methods.size(): "
<<
regularization_methods
.
size
()
<<
", regularization_coeffs.size(): "
<<
regularization_coeffs
.
size
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
NPUDeviceContext
>();
Tensor
mu_tensor
;
mu_tensor
.
mutable_data
<
T
>
(
phi
::
make_ddim
({
1
}),
ctx
.
GetPlace
());
FillNpuTensorWithConstant
<
T
>
(
&
mu_tensor
,
mu
);
for
(
size_t
idx
=
0
;
idx
<
n
;
++
idx
)
{
RegularizationType
regularization_flag
=
regularization_methods
.
size
()
>
0
&&
regularization_methods
[
idx
]
==
"l2_decay"
?
RegularizationType
::
kL2DECAY
:
RegularizationType
::
kNONE
;
float
regularization_coeff
=
0.0
;
if
(
regularization_coeffs
.
size
()
!=
0
)
{
regularization_coeff
=
regularization_coeffs
[
idx
];
}
auto
learning_rate
=
lrs
.
size
()
>
1
?
lrs
[
idx
]
:
lrs
[
0
];
auto
param
=
params
[
idx
];
auto
param_out
=
params_out
[
idx
];
auto
velocity
=
velocitys
[
idx
];
auto
velocity_out
=
velocitys_out
[
idx
];
auto
grad
=
grads
[
idx
];
Tensor
regularized_grad
;
if
(
regularization_flag
==
RegularizationType
::
kL2DECAY
)
{
regularized_grad
.
mutable_data
<
T
>
(
grad
->
dims
(),
ctx
.
GetPlace
());
const
auto
&
runner1
=
NpuOpRunner
(
"Muls"
,
{
*
param
},
{
regularized_grad
},
{{
"value"
,
regularization_coeff
}});
runner1
.
Run
(
dev_ctx
.
stream
());
const
auto
&
runner2
=
NpuOpRunner
(
"Add"
,
{
regularized_grad
,
*
grad
},
{
regularized_grad
},
{});
runner2
.
Run
(
dev_ctx
.
stream
());
}
else
{
regularized_grad
.
ShareDataWith
(
*
grad
);
}
framework
::
TensorCopy
(
*
param
,
ctx
.
GetPlace
(),
dev_ctx
,
param_out
);
framework
::
TensorCopy
(
*
velocity
,
ctx
.
GetPlace
(),
dev_ctx
,
velocity_out
);
// NOTE: ApplyMomentum will change the input
const
auto
&
runner
=
NpuOpRunner
(
"ApplyMomentum"
,
{
*
param_out
,
*
velocity_out
,
*
learning_rate
,
regularized_grad
,
mu_tensor
},
{
*
param_out
},
{{
"use_nesterov"
,
use_nesterov
}});
runner
.
Run
(
dev_ctx
.
stream
());
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_NPU_KERNEL
(
merged_momentum
,
ops
::
NPUMergedMomentumOpKernel
<
float
>
,
ops
::
NPUMergedMomentumOpKernel
<
plat
::
float16
>
);
python/paddle/fluid/tests/unittests/npu/test_merged_momentum_op_npu.py
0 → 100644
浏览文件 @
2b74b739
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
sys
.
path
.
append
(
'..'
)
import
unittest
import
paddle
import
numpy
as
np
from
paddle.fluid.layer_helper
import
LayerHelper
from
collections
import
OrderedDict
def
run_momentum_op
(
params
,
grads
,
velocitys
,
master_params
,
learning_rate
,
place
,
multi_precision
,
mu
=
0.9
,
rescale_grad
=
0.01
,
use_merged
=
False
):
assert
len
(
params
)
==
len
(
grads
)
assert
len
(
params
)
==
len
(
velocitys
)
if
multi_precision
:
assert
len
(
params
)
==
len
(
master_params
)
op_type
=
'merged_momentum'
if
use_merged
else
'momentum'
main
=
paddle
.
static
.
Program
()
startup
=
paddle
.
static
.
Program
()
with
paddle
.
static
.
program_guard
(
main
,
startup
):
helper
=
LayerHelper
(
op_type
,
**
locals
())
attrs
=
{
'mu'
:
mu
,
'multi_precision'
:
multi_precision
,
'rescale_grad'
:
rescale_grad
,
}
param_vars
=
[
helper
.
create_variable
(
persistable
=
True
,
shape
=
p
.
shape
,
dtype
=
p
.
dtype
)
for
p
in
params
]
grad_vars
=
[
helper
.
create_variable
(
shape
=
g
.
shape
,
dtype
=
g
.
dtype
)
for
g
in
grads
]
velocity_vars
=
[
helper
.
create_variable
(
persistable
=
True
,
shape
=
v
.
shape
,
dtype
=
v
.
dtype
)
for
v
in
velocitys
]
lr_var
=
helper
.
create_variable
(
persistable
=
True
,
shape
=
learning_rate
.
shape
,
dtype
=
learning_rate
.
dtype
)
feed_dict
=
OrderedDict
()
feed_dict
.
update
(
OrderedDict
([(
p_var
.
name
,
p_val
)
for
p_var
,
p_val
in
zip
(
param_vars
,
params
)]))
feed_dict
.
update
(
OrderedDict
([(
v_var
.
name
,
v_val
)
for
v_var
,
v_val
in
zip
(
velocity_vars
,
velocitys
)]))
fetch_list
=
list
(
feed_dict
.
keys
())
feed_dict
.
update
(
OrderedDict
([(
g_var
.
name
,
g_val
)
for
g_var
,
g_val
in
zip
(
grad_vars
,
grads
)]))
feed_dict
.
update
({
lr_var
.
name
:
learning_rate
})
if
multi_precision
:
master_param_vars
=
[
helper
.
create_variable
(
persistable
=
True
,
shape
=
p
.
shape
,
dtype
=
p
.
dtype
)
for
p
in
master_params
]
feed_dict
.
update
(
OrderedDict
([(
mp_var
.
name
,
mp_val
)
for
mp_var
,
mp_val
in
zip
(
master_param_vars
,
master_params
)]))
# CPUPlace does not use MasterParam
if
isinstance
(
place
,
paddle
.
CUDAPlace
):
fetch_list
=
fetch_list
+
[
mp_var
.
name
for
mp_var
in
master_param_vars
]
else
:
master_param_vars
=
None
if
not
use_merged
:
for
i
,
(
p
,
g
,
v
)
in
enumerate
(
zip
(
param_vars
,
grad_vars
,
velocity_vars
)):
inputs
=
{
'Param'
:
p
,
'Grad'
:
g
,
'Velocity'
:
v
,
'LearningRate'
:
lr_var
,
}
outputs
=
{
'ParamOut'
:
p
,
'VelocityOut'
:
v
}
if
multi_precision
:
inputs
[
'MasterParam'
]
=
master_param_vars
[
i
]
outputs
[
'MasterParamOut'
]
=
master_param_vars
[
i
]
helper
.
append_op
(
type
=
op_type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
attrs
)
else
:
inputs
=
{
'Param'
:
param_vars
,
'Grad'
:
grad_vars
,
'Velocity'
:
velocity_vars
,
'LearningRate'
:
lr_var
,
}
outputs
=
{
'ParamOut'
:
param_vars
,
'VelocityOut'
:
velocity_vars
}
if
multi_precision
:
inputs
[
'MasterParam'
]
=
master_param_vars
outputs
[
'MasterParamOut'
]
=
master_param_vars
helper
.
append_op
(
type
=
op_type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
attrs
)
exe
=
paddle
.
static
.
Executor
(
place
)
with
paddle
.
static
.
scope_guard
(
paddle
.
static
.
Scope
()):
exe
.
run
(
startup
)
return
exe
.
run
(
main
,
feed
=
feed_dict
,
fetch_list
=
fetch_list
)
def
run_momentum_op2
(
params
,
grads
,
velocitys
,
master_params
,
learning_rate
,
place
,
multi_precision
,
mu
=
0.9
,
rescale_grad
=
0.01
,
use_merged
=
False
,
use_nesterov
=
True
):
assert
len
(
params
)
==
len
(
grads
)
assert
len
(
params
)
==
len
(
velocitys
)
if
multi_precision
:
assert
len
(
params
)
==
len
(
master_params
)
op_type
=
'merged_momentum'
if
use_merged
else
'momentum'
main
=
paddle
.
static
.
Program
()
startup
=
paddle
.
static
.
Program
()
with
paddle
.
static
.
program_guard
(
main
,
startup
):
helper
=
LayerHelper
(
op_type
,
**
locals
())
param_vars
=
[
helper
.
create_variable
(
persistable
=
True
,
shape
=
p
.
shape
,
dtype
=
p
.
dtype
)
for
p
in
params
]
grad_vars
=
[
helper
.
create_variable
(
shape
=
g
.
shape
,
dtype
=
g
.
dtype
)
for
g
in
grads
]
velocity_vars
=
[
helper
.
create_variable
(
persistable
=
True
,
shape
=
v
.
shape
,
dtype
=
v
.
dtype
)
for
v
in
velocitys
]
lr_var
=
helper
.
create_variable
(
persistable
=
True
,
shape
=
learning_rate
.
shape
,
dtype
=
learning_rate
.
dtype
)
feed_dict
=
OrderedDict
()
feed_dict
.
update
(
OrderedDict
([(
p_var
.
name
,
p_val
)
for
p_var
,
p_val
in
zip
(
param_vars
,
params
)]))
feed_dict
.
update
(
OrderedDict
([(
v_var
.
name
,
v_val
)
for
v_var
,
v_val
in
zip
(
velocity_vars
,
velocitys
)]))
fetch_list
=
list
(
feed_dict
.
keys
())
feed_dict
.
update
(
OrderedDict
([(
g_var
.
name
,
g_val
)
for
g_var
,
g_val
in
zip
(
grad_vars
,
grads
)]))
feed_dict
.
update
({
lr_var
.
name
:
learning_rate
})
if
multi_precision
:
master_param_vars
=
[
helper
.
create_variable
(
persistable
=
True
,
shape
=
p
.
shape
,
dtype
=
p
.
dtype
)
for
p
in
master_params
]
feed_dict
.
update
(
OrderedDict
([(
mp_var
.
name
,
mp_val
)
for
mp_var
,
mp_val
in
zip
(
master_param_vars
,
master_params
)]))
# CPUPlace does not use MasterParam
if
isinstance
(
place
,
paddle
.
CUDAPlace
):
fetch_list
=
fetch_list
+
[
mp_var
.
name
for
mp_var
in
master_param_vars
]
else
:
master_param_vars
=
None
if
not
use_merged
:
for
i
,
(
p
,
g
,
v
)
in
enumerate
(
zip
(
param_vars
,
grad_vars
,
velocity_vars
)):
inputs
=
{
'Param'
:
p
,
'Grad'
:
g
,
'Velocity'
:
v
,
'LearningRate'
:
lr_var
,
}
outputs
=
{
'ParamOut'
:
p
,
'VelocityOut'
:
v
}
if
multi_precision
:
inputs
[
'MasterParam'
]
=
master_param_vars
[
i
]
outputs
[
'MasterParamOut'
]
=
master_param_vars
[
i
]
attrs
=
{
'mu'
:
mu
,
'multi_precision'
:
multi_precision
,
'rescale_grad'
:
rescale_grad
,
'use_nesterov'
:
use_nesterov
,
'regularization_method'
:
'l2_decay'
,
'regularization_coeff'
:
2.0
,
}
helper
.
append_op
(
type
=
op_type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
attrs
)
else
:
inputs
=
{
'Param'
:
param_vars
,
'Grad'
:
grad_vars
,
'Velocity'
:
velocity_vars
,
'LearningRate'
:
lr_var
,
}
outputs
=
{
'ParamOut'
:
param_vars
,
'VelocityOut'
:
velocity_vars
}
if
multi_precision
:
inputs
[
'MasterParam'
]
=
master_param_vars
outputs
[
'MasterParamOut'
]
=
master_param_vars
attrs
=
{
'mu'
:
mu
,
'multi_precision'
:
multi_precision
,
'rescale_grad'
:
rescale_grad
,
'use_nesterov'
:
use_nesterov
,
'regularization_method'
:
[
'l2_decay'
for
i
in
range
(
len
(
param_vars
))],
'regularization_coeff'
:
[
2.0
for
i
in
range
(
len
(
param_vars
))],
}
helper
.
append_op
(
type
=
op_type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
attrs
)
exe
=
paddle
.
static
.
Executor
(
place
)
with
paddle
.
static
.
scope_guard
(
paddle
.
static
.
Scope
()):
exe
.
run
(
startup
)
return
exe
.
run
(
main
,
feed
=
feed_dict
,
fetch_list
=
fetch_list
)
class
TestMergedMomentum
(
unittest
.
TestCase
):
def
setUp
(
self
):
paddle
.
enable_static
()
self
.
shapes
=
[[
3
,
4
],
[
2
,
7
],
[
5
,
6
],
[
7
,
8
]]
self
.
seed
=
10
self
.
place
=
paddle
.
fluid
.
NPUPlace
(
0
)
self
.
__class__
.
use_npu
=
True
def
gen_rand_data
(
self
,
shapes
,
dtype
):
return
[
np
.
random
.
random
(
s
).
astype
(
dtype
)
for
s
in
shapes
]
def
prepare_data
(
self
,
shapes
,
multi_precision
,
seed
,
place
):
np
.
random
.
seed
(
seed
)
mp_dtype
=
np
.
float32
dtype
=
np
.
float32
params
=
self
.
gen_rand_data
(
shapes
,
dtype
)
grads
=
self
.
gen_rand_data
(
shapes
,
dtype
)
velocitys
=
self
.
gen_rand_data
(
shapes
,
mp_dtype
)
learning_rate
=
self
.
gen_rand_data
([[
1
]],
mp_dtype
)[
0
]
if
multi_precision
:
master_params
=
[
p
.
astype
(
mp_dtype
)
for
p
in
params
]
else
:
master_params
=
None
return
params
,
grads
,
velocitys
,
master_params
,
learning_rate
def
check_with_place
(
self
,
place
,
multi_precision
):
params
,
grads
,
velocitys
,
master_params
,
learning_rate
=
self
.
prepare_data
(
self
.
shapes
,
multi_precision
,
self
.
seed
,
place
)
def
run_op
(
use_merged
):
# NPU Momentum Op does not support rescale_grad
rescale_grad
=
1.0
return
run_momentum_op
(
params
,
grads
,
velocitys
,
master_params
,
learning_rate
,
place
,
multi_precision
,
rescale_grad
=
rescale_grad
,
use_merged
=
use_merged
)
outs1
=
run_op
(
True
)
outs2
=
run_op
(
False
)
self
.
assertEqual
(
len
(
outs1
),
len
(
outs2
))
for
i
,
(
out1
,
out2
)
in
enumerate
(
zip
(
outs1
,
outs2
)):
self
.
assertTrue
(
np
.
allclose
(
out1
,
out2
,
atol
=
1e-7
))
def
test_main
(
self
):
self
.
check_with_place
(
self
.
place
,
multi_precision
=
False
)
class
TestMergedMomentum2
(
unittest
.
TestCase
):
def
setUp
(
self
):
paddle
.
enable_static
()
self
.
shapes
=
[[
3
,
4
],
[
2
,
7
],
[
5
,
6
],
[
7
,
8
]]
self
.
seed
=
10
self
.
place
=
paddle
.
fluid
.
NPUPlace
(
0
)
self
.
__class__
.
use_npu
=
True
def
gen_rand_data
(
self
,
shapes
,
dtype
):
return
[
np
.
random
.
random
(
s
).
astype
(
dtype
)
for
s
in
shapes
]
def
prepare_data
(
self
,
shapes
,
multi_precision
,
seed
,
place
):
np
.
random
.
seed
(
seed
)
mp_dtype
=
np
.
float32
dtype
=
np
.
float32
# np.float16
params
=
self
.
gen_rand_data
(
shapes
,
dtype
)
grads
=
self
.
gen_rand_data
(
shapes
,
dtype
)
velocitys
=
self
.
gen_rand_data
(
shapes
,
mp_dtype
)
learning_rate
=
self
.
gen_rand_data
([[
1
]],
mp_dtype
)[
0
]
if
multi_precision
:
master_params
=
[
p
.
astype
(
mp_dtype
)
for
p
in
params
]
else
:
master_params
=
None
return
params
,
grads
,
velocitys
,
master_params
,
learning_rate
def
check_with_place
(
self
,
place
,
multi_precision
):
params
,
grads
,
velocitys
,
master_params
,
learning_rate
=
self
.
prepare_data
(
self
.
shapes
,
multi_precision
,
self
.
seed
,
place
)
def
run_op
(
use_nesterov
,
use_merged
):
# NPU Momentum Op does not support rescale_grad
rescale_grad
=
1.0
return
run_momentum_op2
(
params
,
grads
,
velocitys
,
master_params
,
learning_rate
,
place
,
multi_precision
,
rescale_grad
=
rescale_grad
,
use_merged
=
use_merged
,
use_nesterov
=
use_nesterov
)
outs1
=
run_op
(
use_nesterov
=
True
,
use_merged
=
True
)
outs2
=
run_op
(
use_nesterov
=
True
,
use_merged
=
False
)
self
.
assertEqual
(
len
(
outs1
),
len
(
outs2
))
for
i
,
(
out1
,
out2
)
in
enumerate
(
zip
(
outs1
,
outs2
)):
self
.
assertTrue
(
np
.
allclose
(
out1
,
out2
,
atol
=
1e-7
))
outs3
=
run_op
(
use_nesterov
=
False
,
use_merged
=
True
)
outs4
=
run_op
(
use_nesterov
=
False
,
use_merged
=
False
)
self
.
assertEqual
(
len
(
outs3
),
len
(
outs4
))
for
j
,
(
out3
,
out4
)
in
enumerate
(
zip
(
outs3
,
outs4
)):
self
.
assertTrue
(
np
.
allclose
(
out3
,
out4
,
atol
=
1e-7
))
def
test_main
(
self
):
self
.
check_with_place
(
self
.
place
,
multi_precision
=
False
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录