Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
766bd529
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
766bd529
编写于
9月 24, 2019
作者:
J
jhjiangcs
提交者:
zhang wenhui
9月 24, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add optimizer:dpsgd,test=develop (#19915)
上级
37f76407
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
431 addition
and
7 deletion
+431
-7
paddle/fluid/API.spec
paddle/fluid/API.spec
+8
-0
paddle/fluid/operators/optimizers/dpsgd_op.cc
paddle/fluid/operators/optimizers/dpsgd_op.cc
+107
-0
paddle/fluid/operators/optimizers/dpsgd_op.h
paddle/fluid/operators/optimizers/dpsgd_op.h
+114
-0
python/paddle/fluid/contrib/slim/graph/graph_wrapper.py
python/paddle/fluid/contrib/slim/graph/graph_wrapper.py
+1
-0
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+87
-7
python/paddle/fluid/tests/unittests/test_dpsgd_op.py
python/paddle/fluid/tests/unittests/test_dpsgd_op.py
+73
-0
python/paddle/fluid/tests/unittests/test_optimizer.py
python/paddle/fluid/tests/unittests/test_optimizer.py
+41
-0
未找到文件。
paddle/fluid/API.spec
浏览文件 @
766bd529
...
...
@@ -936,6 +936,14 @@ paddle.fluid.optimizer.AdamaxOptimizer.backward (ArgSpec(args=['self', 'loss', '
paddle.fluid.optimizer.AdamaxOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamaxOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde'))
paddle.fluid.optimizer.AdamaxOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'b15cffad0903fc81af77a0580ceb2a9b'))
paddle.fluid.optimizer.DpsgdOptimizer ('paddle.fluid.optimizer.DpsgdOptimizer', ('document', '71113c30b66c0f4035b10ebd8af8c5ad'))
paddle.fluid.optimizer.DpsgdOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'clip', 'batch_size', 'sigma'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.DpsgdOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610'))
paddle.fluid.optimizer.DpsgdOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.DpsgdOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.DpsgdOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.DpsgdOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde'))
paddle.fluid.optimizer.DpsgdOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'b15cffad0903fc81af77a0580ceb2a9b'))
paddle.fluid.optimizer.DecayedAdagradOptimizer ('paddle.fluid.optimizer.DecayedAdagradOptimizer', ('document', 'e76838a8586bf2e58e6b5cdd2f67f780'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610'))
...
...
paddle/fluid/operators/optimizers/dpsgd_op.cc
0 → 100644
浏览文件 @
766bd529
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/optimizers/dpsgd_op.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
class
DpsgdOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Param"
),
true
,
"Input(Param) of DpsgdOp should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Grad"
),
true
,
"Input(Grad) of DpsgdOp should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"LearningRate"
),
true
,
"Input(LearningRate) of DpsgdOp should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
());
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Grad"
).
front
(),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Grad"
).
front
(),
ctx
->
GetInputsVarType
(
"Grad"
).
front
());
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
true
,
"Output(ParamOut) of DpsgdOp should not be null."
);
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
"Learning rate should have 1 dimension"
);
auto
param_dims
=
ctx
->
GetInputDim
(
"Param"
);
PADDLE_ENFORCE_EQ
(
param_dims
,
ctx
->
GetInputDim
(
"Grad"
),
"Param and Grad input of DpsgdOp should have same dimension"
);
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dims
);
}
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"Param"
)
->
type
(),
ctx
.
GetPlace
());
}
};
class
DpsgdOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"Param"
,
"(Tensor) Input parameter"
);
AddInput
(
"Grad"
,
"(Tensor) Input gradient"
);
AddInput
(
"LearningRate"
,
"(Tensor) Learning rate"
);
AddOutput
(
"ParamOut"
,
"(Tensor) Output parameter"
);
AddAttr
<
float
>
(
"clip"
,
"(float, default 0.9) "
"Exponential decay rate for the "
"1st moment estimates."
)
.
SetDefault
(
10.0
f
);
AddAttr
<
float
>
(
"batch_size"
,
"(float, default 0.999) "
"exponential decay rate for the weighted "
"infinity norm estimates."
)
.
SetDefault
(
16.0
f
);
AddAttr
<
float
>
(
"sigma"
,
"(float, default 1.0e-8) "
"Constant for numerical stability"
)
.
SetDefault
(
1.0
f
);
AddComment
(
R"DOC(
Dpsgd Optimizer.
We implement the Dpsgd optimizer according to CCS16 paper -
Deep Learning with Differential Privacy.
Dpsgd updates:
CCS16 - Deep Learning with Differential Privacy.
[https://arxiv.org/abs/1607.00133]
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_WITHOUT_GRADIENT
(
dpsgd
,
ops
::
DpsgdOp
,
ops
::
DpsgdOpMaker
);
REGISTER_OP_CPU_KERNEL
(
dpsgd
,
ops
::
DpsgdOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
DpsgdOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/optimizers/dpsgd_op.h
0 → 100644
浏览文件 @
766bd529
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <math.h>
#include <stdlib.h>
#include <iostream>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
DpsgdOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
true
,
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
true
,
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
const
auto
*
learning_rate
=
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
);
const
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
const
auto
*
grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Grad"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
sz
=
param_out
->
numel
();
PADDLE_ENFORCE_EQ
(
param
->
numel
(),
sz
);
PADDLE_ENFORCE_EQ
(
grad
->
numel
(),
sz
);
const
T
*
lr
=
learning_rate
->
data
<
T
>
();
const
T
*
param_data
=
param
->
data
<
T
>
();
const
T
*
grad_data
=
grad
->
data
<
T
>
();
T
*
out_data
=
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
clip
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"clip"
));
T
batch_size
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"batch_size"
));
T
sigma
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"sigma"
));
// compute clipping
float
l2_norm
=
0.0
;
for
(
int64_t
i
=
0
;
i
<
grad
->
numel
();
++
i
)
{
l2_norm
=
l2_norm
+
grad_data
[
i
]
*
grad_data
[
i
];
}
l2_norm
=
std
::
sqrt
(
l2_norm
);
float
scale
=
1.0
;
if
(
l2_norm
>
clip
)
{
scale
=
l2_norm
/
clip
;
}
// generate gaussian noise.
// [https://en.wikipedia.org/wiki/Box-Muller_transform]
float
V1
,
V2
,
S
;
float
X
;
float
mu
=
0.0
;
float
U1
,
U2
;
unsigned
seed
=
(
unsigned
int
)(
time
(
NULL
));
std
::
minstd_rand
engine
;
engine
.
seed
(
seed
);
std
::
uniform_real_distribution
<
T
>
dist
(
0.0
,
1.0
);
do
{
// srand((unsigned int)(time(NULL)));
// U1 = (rand() * 1.0) / RAND_MAX;
// U2 = (rand() * 1.0) / RAND_MAX;
// U1 = rand_rr(&seed) * (1.0 / RAND_MAX);
// U2 = rand_rr(&seed) * (1.0 / RAND_MAX);
U1
=
dist
(
engine
);
U2
=
dist
(
engine
);
V1
=
2
*
U1
-
1
;
V2
=
2
*
U2
-
1
;
S
=
V1
*
V1
+
V2
*
V2
;
}
while
(
S
>=
1
||
S
==
0
);
X
=
V1
*
sqrt
(
-
2
*
log
(
S
)
/
S
);
float
gaussian_noise
=
mu
+
X
*
sigma
;
// update parameters
for
(
int64_t
i
=
0
;
i
<
grad
->
numel
();
++
i
)
{
out_data
[
i
]
=
param_data
[
i
]
-
lr
[
0
]
*
(
grad_data
[
i
]
/
scale
+
gaussian_noise
/
batch_size
);
}
// CCS16 - Deep Learning with Differential Privacy.
// [https://arxiv.org/abs/1607.00133]
}
// Compute
};
// class
}
// namespace operators
}
// namespace paddle
python/paddle/fluid/contrib/slim/graph/graph_wrapper.py
浏览文件 @
766bd529
...
...
@@ -35,6 +35,7 @@ OPTIMIZER_OPS = [
'adagrad'
,
'adam'
,
'adamax'
,
'dpsgd'
,
'decayed_adagrad'
,
'adadelta'
,
'rmsprop'
,
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
766bd529
...
...
@@ -39,13 +39,13 @@ from .wrapped_decorator import signature_safe_contextmanager
from
..
import
compat
as
cpt
__all__
=
[
'SGD'
,
'Momentum'
,
'Adagrad'
,
'Adam'
,
'Adamax'
,
'D
ecayedAdagrad'
,
'Ftrl
'
,
'
SGDOptimizer'
,
'MomentumOptimizer'
,
'AdagradOptimizer'
,
'Adam
Optimizer'
,
'Adam
axOptimizer'
,
'DecayedAdagradOptimizer'
,
'RMSProp
Optimizer'
,
'
FtrlOptimizer'
,
'Adadelta'
,
'ModelAverage'
,
'LarsMomentum
'
,
'
LarsMomentumOptimizer'
,
'DGCMomentumOptimizer'
,
'Lamb
Optimizer'
,
'
ExponentialMovingAverage'
,
'PipelineOptimizer'
,
'LookaheadOptimizer
'
,
'RecomputeOptimizer'
'SGD'
,
'Momentum'
,
'Adagrad'
,
'Adam'
,
'Adamax'
,
'D
psgd'
,
'DecayedAdagrad
'
,
'
Ftrl'
,
'SGDOptimizer'
,
'MomentumOptimizer'
,
'Adagrad
Optimizer'
,
'Adam
Optimizer'
,
'AdamaxOptimizer'
,
'Dpsgd
Optimizer'
,
'
DecayedAdagradOptimizer'
,
'RMSPropOptimizer'
,
'FtrlOptimizer'
,
'Adadelta
'
,
'
ModelAverage'
,
'LarsMomentum'
,
'LarsMomentum
Optimizer'
,
'
DGCMomentumOptimizer'
,
'LambOptimizer'
,
'ExponentialMovingAverage
'
,
'
PipelineOptimizer'
,
'LookaheadOptimizer'
,
'
RecomputeOptimizer'
]
...
...
@@ -1605,6 +1605,85 @@ class AdamaxOptimizer(Optimizer):
stop_gradient
=
True
)
class
DpsgdOptimizer
(
Optimizer
):
"""
We implement the Dpsgd optimizer according to CCS16 paper -
Deep Learning with Differential Privacy.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy
# First create the Executor.
place = fluid.CPUPlace() # fluid.CUDAPlace(0)
exe = fluid.Executor(place)
train_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
data = fluid.layers.data(name='X', shape=[1], dtype='float32')
hidden = fluid.layers.fc(input=data, size=10)
loss = fluid.layers.mean(hidden)
optimizer = fluid.optimizer.Dpsgd(learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0)
optimizer.minimize(loss)
# Run the startup program once and only once.
exe.run(startup_program)
x = numpy.random.random(size=(10, 1)).astype('float32')
outs = exe.run(program=train_program,
feed={'X': x},
fetch_list=[loss.name])
Args:
learning_rate (float|Variable): the learning rate used to update parameters.
\
Can be a float value or a Variable with one float value as data element.
clip (float): clipping threshold
batch_size (float): batch size.
sigma (float): for gaussian noise.
Notes:
Currently, DpsgdOptimizer doesn't support sparse parameter optimization.
"""
def
__init__
(
self
,
learning_rate
=
0.001
,
clip
=
0.9
,
batch_size
=
0.999
,
sigma
=
1e-8
):
assert
learning_rate
is
not
None
assert
clip
is
not
None
assert
batch_size
is
not
None
assert
sigma
is
not
None
super
(
DpsgdOptimizer
,
self
).
__init__
(
learning_rate
=
learning_rate
)
self
.
type
=
"dpsgd"
self
.
_clip
=
clip
self
.
_batch_size
=
batch_size
self
.
_sigma
=
sigma
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
assert
isinstance
(
block
,
framework
.
Block
)
# create the dpsgd optimize op
dpsgd_op
=
block
.
append_op
(
type
=
self
.
type
,
inputs
=
{
"Param"
:
param_and_grad
[
0
],
"Grad"
:
param_and_grad
[
1
],
"LearningRate"
:
self
.
_create_param_lr
(
param_and_grad
)
},
outputs
=
{
"ParamOut"
:
param_and_grad
[
0
]},
attrs
=
{
"clip"
:
self
.
_clip
,
"batch_size"
:
self
.
_batch_size
,
"sigma"
:
self
.
_sigma
},
stop_gradient
=
True
)
return
dpsgd_op
class
DecayedAdagradOptimizer
(
Optimizer
):
"""
**Decayed Adagrad Optimizer**
...
...
@@ -2258,6 +2337,7 @@ Momentum = MomentumOptimizer
Adagrad
=
AdagradOptimizer
Adam
=
AdamOptimizer
Adamax
=
AdamaxOptimizer
Dpsgd
=
DpsgdOptimizer
DecayedAdagrad
=
DecayedAdagradOptimizer
Adadelta
=
AdadeltaOptimizer
RMSProp
=
RMSPropOptimizer
...
...
python/paddle/fluid/tests/unittests/test_dpsgd_op.py
0 → 100644
浏览文件 @
766bd529
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
class
TestDpsgdOp
(
OpTest
):
def
setUp
(
self
):
'''Test Dpsgd Operator with supplied attributes
'''
self
.
op_type
=
"dpsgd"
param
=
np
.
random
.
uniform
(
-
1
,
1
,
(
102
,
105
)).
astype
(
"float32"
)
grad
=
np
.
random
.
uniform
(
-
1
,
1
,
(
102
,
105
)).
astype
(
"float32"
)
learning_rate
=
0.001
clip
=
10000.0
batch_size
=
16.0
sigma
=
0.0
self
.
inputs
=
{
'Param'
:
param
,
'Grad'
:
grad
,
'LearningRate'
:
np
.
array
([
learning_rate
]).
astype
(
"float32"
)
}
self
.
attrs
=
{
'clip'
:
clip
,
'batch_size'
:
batch_size
,
'sigma'
:
sigma
}
param_out
=
dpsgd_step
(
self
.
inputs
,
self
.
attrs
)
self
.
outputs
=
{
'ParamOut'
:
param_out
}
def
test_check_output
(
self
):
self
.
check_output
()
def
dpsgd_step
(
inputs
,
attributes
):
'''
Simulate one step of the dpsgd optimizer
:param inputs: dict of inputs
:param attributes: dict of attributes
:return tuple: tuple of output param, moment, inf_norm and
beta1 power accumulator
'''
param
=
inputs
[
'Param'
]
grad
=
inputs
[
'Grad'
]
lr
=
inputs
[
'LearningRate'
]
clip
=
attributes
[
'clip'
]
batch_size
=
attributes
[
'batch_size'
]
sigma
=
attributes
[
'sigma'
]
param_out
=
param
-
lr
*
grad
return
param_out
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_optimizer.py
浏览文件 @
766bd529
...
...
@@ -408,6 +408,47 @@ class TestAdamaxOptimizer(unittest.TestCase):
self
.
assertAlmostEqual
(
init_ops
[
0
].
attr
(
'value'
),
learning_rate
)
class
TestDpsgdOptimizer
(
unittest
.
TestCase
):
def
test_dpsgd_optimizer
(
self
):
def
check_dpsgd_optimizer
(
optimizer_attr
):
init_program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
mul_x
=
block
.
create_parameter
(
dtype
=
"float32"
,
shape
=
[
5
,
10
],
lod_level
=
0
,
name
=
"mul.x"
,
optimize_attr
=
optimizer_attr
)
mul_y
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
10
,
8
],
lod_level
=
0
,
name
=
"mul.y"
)
mul_out
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
5
,
8
],
lod_level
=
0
,
name
=
"mul.out"
)
block
.
append_op
(
type
=
"mul"
,
inputs
=
{
"X"
:
mul_x
,
"Y"
:
mul_y
},
outputs
=
{
"Out"
:
mul_out
},
attrs
=
{
"x_num_col_dims"
:
1
})
mean_out
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
1
],
lod_level
=
0
,
name
=
"mean.out"
)
block
.
append_op
(
type
=
"mean"
,
inputs
=
{
"X"
:
mul_out
},
outputs
=
{
"Out"
:
mean_out
})
dpsgd_optimizer
=
optimizer
.
DpsgdOptimizer
(
learning_rate
=
0.01
,
clip
=
100.0
,
batch_size
=
16.0
,
sigma
=
0.0
)
opts
,
_
=
dpsgd_optimizer
.
minimize
(
mean_out
,
init_program
)
return
opts
opts
=
check_dpsgd_optimizer
({
'learning_rate'
:
1.1
,
'clip'
:
100.0
,
'batch_size'
:
16.0
,
'sigma'
:
4.0
})
self
.
assertEqual
(
len
(
opts
),
2
)
self
.
assertEqual
([
op
.
type
for
op
in
opts
],
[
"scale"
,
"dpsgd"
])
class
TestDecayedAdagradOptimizer
(
unittest
.
TestCase
):
class
MockDecayedAdagrad
(
optimizer
.
DecayedAdagradOptimizer
):
def
get_accumulators
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录