Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
8f480ca9
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8f480ca9
编写于
10月 11, 2019
作者:
Y
Yibing Liu
提交者:
GitHub
10月 11, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix problems in en docs (#20451)
test=release/1.6, test=document_fix
上级
dc206128
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
169 addition
and
146 deletion
+169
-146
paddle/fluid/API.spec
paddle/fluid/API.spec
+13
-13
paddle/fluid/operators/crf_decoding_op.cc
paddle/fluid/operators/crf_decoding_op.cc
+23
-21
paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
+4
-4
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+91
-80
python/paddle/fluid/nets.py
python/paddle/fluid/nets.py
+9
-6
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+29
-22
未找到文件。
paddle/fluid/API.spec
浏览文件 @
8f480ca9
...
...
@@ -128,7 +128,7 @@ paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', '
paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '83617c165827e030636c80486d5de6f3'))
paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', '33974b9bfa69f2f1eb85e6f956dff04e'))
paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b28bdb43160e9667be2a3457d19d9f5b'))
paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '
933b7e268c4ffa3d5c3ef953a5ee9f0b
'))
paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '
708ce0348b74d3e0c7885c2c524b7fa7
'))
paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '48ec1ba2d75c4e2faf8d9a47350462ae'))
paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', 'd1985a930a59c3bd41a7c1d72594f5b9'))
paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ae57e6e5136dade436f0df1f11770afa'))
...
...
@@ -176,7 +176,7 @@ paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_t
paddle.fluid.layers.sequence_reshape (ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None), ('document', 'eeb1591cfc854c6ffdac77b376313c44'))
paddle.fluid.layers.transpose (ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '8e72db173d4c082e27cb11f31d8c9bfa'))
paddle.fluid.layers.im2sequence (ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)), ('document', 'fe352915a543cec434f74e9b32ac49da'))
paddle.fluid.layers.nce (ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)), ('document', '
83d4ca6dfb957912807f535756e7699
2'))
paddle.fluid.layers.nce (ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)), ('document', '
38297567127888e01542857839058d5
2'))
paddle.fluid.layers.sampled_softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0)), ('document', 'd4435a63d34203339831ee6a86ef9242'))
paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', 'b83e7dfa81059b39bb137922dc914f50'))
paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', '1270395ce97a4e1b556104abbb14f096'))
...
...
@@ -188,7 +188,7 @@ paddle.fluid.layers.spectral_norm (ArgSpec(args=['weight', 'dim', 'power_iters',
paddle.fluid.layers.softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax', 'axis'], varargs=None, keywords=None, defaults=(False, -100, True, False, -1)), ('document', '54e1675aa0364f4a78fa72804ec0f413'))
paddle.fluid.layers.smooth_l1 (ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'ecb75c1b00c4c76c98b482f633b7a10c'))
paddle.fluid.layers.one_hot (ArgSpec(args=['input', 'depth', 'allow_out_of_range'], varargs=None, keywords=None, defaults=(False,)), ('document', 'cdf5dc2078f1e20dc61dd0bec7e28a29'))
paddle.fluid.layers.autoincreased_step_counter (ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)), ('document', '
98e7927f09ee2270535b29f048e481ec
'))
paddle.fluid.layers.autoincreased_step_counter (ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)), ('document', '
d016c137beb9a4528b7378b437d00151
'))
paddle.fluid.layers.reshape (ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', 'd7a6d59e464a7ef1184eb6caefeb49f1'))
paddle.fluid.layers.squeeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbac07662a6e22e8e299ced880c7775'))
paddle.fluid.layers.unsqueeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b9bd3129d36a70e7c4385df51ff71c62'))
...
...
@@ -206,7 +206,7 @@ paddle.fluid.layers.image_resize_short (ArgSpec(args=['input', 'out_short_len',
paddle.fluid.layers.resize_bilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode', 'data_format'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1, 'NCHW')), ('document', '44da7890c8a362a83a1c0902a1dc1e4d'))
paddle.fluid.layers.resize_trilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode', 'data_format'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1, 'NCDHW')), ('document', '5b4d0f823f94c260fe5e6f7eec60a797'))
paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'data_format'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 'NCHW')), ('document', '0107a5cbae1aef3f381d3d769a6068eb'))
paddle.fluid.layers.gather (ArgSpec(args=['input', 'index', 'overwrite'], varargs=None, keywords=None, defaults=(True,)), ('document', '
f985c9b66e3aec96fa753a8eb44c991c
'))
paddle.fluid.layers.gather (ArgSpec(args=['input', 'index', 'overwrite'], varargs=None, keywords=None, defaults=(True,)), ('document', '
5c52e8512f97a84608bc3b8b3250fc70
'))
paddle.fluid.layers.gather_nd (ArgSpec(args=['input', 'index', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a7d625028525167b138106f574dffdf9'))
paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name', 'overwrite'], varargs=None, keywords=None, defaults=(None, True)), ('document', '69b22affd4a6326502af166f04c095ab'))
paddle.fluid.layers.scatter_nd_add (ArgSpec(args=['ref', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2607b5c9369fbc52f208de066a80fc25'))
...
...
@@ -220,7 +220,7 @@ paddle.fluid.layers.log (ArgSpec(args=['x', 'name'], varargs=None, keywords=None
paddle.fluid.layers.crop (ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '32196a194f757b4da114a595a5bc6414'))
paddle.fluid.layers.crop_tensor (ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'd460aaf35afbbeb9beea4789aa6e4343'))
paddle.fluid.layers.rank_loss (ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '8eb36596bb43d7a907d3397c7aedbdb3'))
paddle.fluid.layers.margin_rank_loss (ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None)), ('document', '
6fc86ed23b420c8a0f6c043563cf3937
'))
paddle.fluid.layers.margin_rank_loss (ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None)), ('document', '
1a177f30e5013fae7ee6c45860cf4946
'))
paddle.fluid.layers.elu (ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', '9af1926c06711eacef9e82d7a9e4d308'))
paddle.fluid.layers.relu6 (ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None)), ('document', '538fc860b2a1734e118b94e4a1a3ee67'))
paddle.fluid.layers.pow (ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', '00d437d1e0d9450ea75a0495b93b54a7'))
...
...
@@ -254,7 +254,7 @@ paddle.fluid.layers.elementwise_floordiv (ArgSpec(args=['x', 'y', 'axis', 'act',
paddle.fluid.layers.uniform_random_batch_size_like (ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0)), ('document', 'cfa120e583cd4a5bfa120c8a26f98a28'))
paddle.fluid.layers.gaussian_random (ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')), ('document', 'ebbf399d4e03190ce5dc9488f05c92f4'))
paddle.fluid.layers.sampling_id (ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')), ('document', 'c39b647b6cf08e058d96ee503d5284fe'))
paddle.fluid.layers.gaussian_random_batch_size_like (ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32')), ('document', '
b24d0b21361c4bb8ef2cec8c26fb12b2
'))
paddle.fluid.layers.gaussian_random_batch_size_like (ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32')), ('document', '
2aed0f546f220364fb1da724a3176f74
'))
paddle.fluid.layers.sum (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'f4b60847cb0f1ae00823ba6fb1b11310'))
paddle.fluid.layers.slice (ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None), ('document', '8c622791994a0d657d8c6c9cefa5bf34'))
paddle.fluid.layers.strided_slice (ArgSpec(args=['input', 'axes', 'starts', 'ends', 'strides'], varargs=None, keywords=None, defaults=None), ('document', '340d8d656272ea396b441aab848429a2'))
...
...
@@ -275,12 +275,12 @@ paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], var
paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '315b50c1cbd9569375b098c56f1e91c9'))
paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5b32ed21ab89140a8e758002923a0da3'))
paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name', 'act'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None, None)), ('document', 'ecc4b1323028bde0518d666882d03515'))
paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '
18ec2e3afeb90e70c8b73d2b71c40fdb
'))
paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '
57256fcb1119dc35ae031889fa601d61
'))
paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'a0b73c21be618cec0281e7903039e5e3'))
paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '90c74742f48c70b103f1fbb9eb129066'))
paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', 'e
3993a477c94729526040ff65d95728e
'))
paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', 'e
f1701e11d60508fe8f02dd2a8c60bdf
'))
paddle.fluid.layers.add_position_encoding (ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e399f9436fed5f7ff480d8532e42c937'))
paddle.fluid.layers.bilinear_tensor_product (ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '
45fc3652a8e1aeffbe4eba371c54f756
'))
paddle.fluid.layers.bilinear_tensor_product (ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '
6755168c4b2308e1e4f54cb56fa7dcb2
'))
paddle.fluid.layers.merge_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b2b0e5d5c155ce24bafc38b78cd0b164'))
paddle.fluid.layers.get_tensor_from_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2c568321feb4d16c41a83df43f95089d'))
paddle.fluid.layers.lstm (ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1)), ('document', '5193cf1113f9d8d8f682ee5a5fc8b391'))
...
...
@@ -900,7 +900,7 @@ paddle.fluid.transpiler.DistributeTranspilerConfig ('paddle.fluid.transpiler.dis
paddle.fluid.transpiler.DistributeTranspilerConfig.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.nets.simple_img_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True)), ('document', '5e89c978199c4ecce2b26d5fed1ec52b'))
paddle.fluid.nets.sequence_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type', 'bias_attr'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max', None)), ('document', 'b2d435f782ac8ea3ca480b8d24e7f5b4'))
paddle.fluid.nets.glu (ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)), ('document', '
b87bacfc70dd3477ed25ef14aa01389a
'))
paddle.fluid.nets.glu (ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)), ('document', '
3efe197c8e3e75f84a4c464d8b74e943
'))
paddle.fluid.nets.scaled_dot_product_attention (ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)), ('document', 'b1a07a0000eb9103e3a143ca8c13de5b'))
paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)), ('document', '6033b78da39b8b0ed302fbb0f67da502'))
paddle.fluid.optimizer.SGDOptimizer ('paddle.fluid.optimizer.SGDOptimizer', ('document', 'c3c8dd3193d991adf8bda505560371d6'))
...
...
@@ -1009,7 +1009,7 @@ paddle.fluid.optimizer.DGCMomentumOptimizer.backward (ArgSpec(args=['self', 'los
paddle.fluid.optimizer.DGCMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.DGCMomentumOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde'))
paddle.fluid.optimizer.DGCMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304'))
paddle.fluid.optimizer.LambOptimizer ('paddle.fluid.optimizer.LambOptimizer', ('document', '
7dd8b270156a52f1f6b4663336960893
'))
paddle.fluid.optimizer.LambOptimizer ('paddle.fluid.optimizer.LambOptimizer', ('document', '
56b5b21dc8fb01174c3bdd0b24f8be4b
'))
paddle.fluid.optimizer.LambOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'lamb_weight_decay', 'beta1', 'beta2', 'epsilon', 'regularization', 'exclude_from_weight_decay_fn', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.01, 0.9, 0.999, 1e-06, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.LambOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610'))
paddle.fluid.optimizer.LambOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
...
...
@@ -1017,9 +1017,9 @@ paddle.fluid.optimizer.LambOptimizer.backward (ArgSpec(args=['self', 'loss', 'st
paddle.fluid.optimizer.LambOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.LambOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde'))
paddle.fluid.optimizer.LambOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304'))
paddle.fluid.optimizer.ExponentialMovingAverage ('paddle.fluid.optimizer.ExponentialMovingAverage', ('document', '
a38b7d5b9f17a295ed15d4c1b9ab4cd0
'))
paddle.fluid.optimizer.ExponentialMovingAverage ('paddle.fluid.optimizer.ExponentialMovingAverage', ('document', '
c93a5e29890877d94c3180b3d5f2d464
'))
paddle.fluid.optimizer.ExponentialMovingAverage.__init__ (ArgSpec(args=['self', 'decay', 'thres_steps', 'name'], varargs=None, keywords=None, defaults=(0.999, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.ExponentialMovingAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '
30f494752ac8921dc5835a63637f453a
'))
paddle.fluid.optimizer.ExponentialMovingAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '
178fa374c299906eb88c0df8ed21a337
'))
paddle.fluid.optimizer.ExponentialMovingAverage.restore (ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None), ('document', '8c8a1791608b02a1ede53d6dd3a4fcec'))
paddle.fluid.optimizer.ExponentialMovingAverage.update (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'ea10f08af6d7aac3b7974aa976e4085f'))
paddle.fluid.optimizer.PipelineOptimizer ('paddle.fluid.optimizer.PipelineOptimizer', ('document', '6f85382abedb922387b08d98e8d0b69c'))
...
...
paddle/fluid/operators/crf_decoding_op.cc
浏览文件 @
8f480ca9
...
...
@@ -21,45 +21,47 @@ class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
AddInput
(
"Emission"
,
"(Tensor<float>/LoDTensor<float>). For a LoDTensor input, its "
"shape is [N x D] where N is the total sequence length of the "
"mini-batch and D is the total tag number. While for a tensor "
"input, its shape is [B X S X D] with B the batch size and S the "
"sequence length of each sample after padding. This input is the "
"unscaled emission weight matrix of the linear_chain_crf operator."
);
"(Tensor/LoDTensor). For a LoDTensor input, its shape is [N x D] "
"where N is the total sequence length of the mini-batch and D is "
"the total tag number. While for a tensor input, its shape is "
"[B X S X D] with B the batch size and S the sequence length of each "
"sample after padding. This input is the unscaled emission weight "
"matrix of the linear_chain_crf operator. The data type is float32 "
"or float64."
);
AddInput
(
"Transition"
,
"(Tensor
<float>
). A Tensor with shape [(D + 2) x D]. "
"(Tensor). A Tensor with shape [(D + 2) x D]. "
"This input is the transition weights learned by the linear_chain_crf "
"operator, denoted as w. The 1st row of w are transition weights for "
"the start mask. The 2nd row of w are transition weights for the end "
"mask. Transition weights between other tags begin from the 3rd row of "
"w. See more details in comments of the linear_chain_crf operator."
);
"w. See more details in comments of the linear_chain_crf operator. "
"The data type is the same as Input(Emission)."
);
AddInput
(
"Label"
,
"(Tensor
<int64_t>/LoDTensor<int64_t>
). The ground truth with shape "
"(Tensor
/LoDTensor
). The ground truth with shape "
"[N x 1] (for LoDTensor) or [B x S] (for Tensor). This input is "
"optional. See more details in the operator's comments."
)
"optional. See more details in the operator's comments. The data type "
"is int64."
)
.
AsDispensable
();
AddOutput
(
"ViterbiPath"
,
"(Tensor
<int64_t>/LoDTensor<int64_t>
). The decoding results. What to "
"(Tensor
/LoDTensor
). The decoding results. What to "
"return changes depending on whether the Input(Label) (the ground "
"truth) is given. See more details in the operator's comment."
);
"truth) is given. See more details in the operator's comment. "
"The data type is int64."
);
AddInput
(
"Length"
,
"(Tensor
<int64_t>
). The actual length of each sample before "
"(Tensor). The actual length of each sample before "
"padding with shape [B x 1]. It means the Input(Emission), "
"Input(Label) "
"and Output(ViterbiPath) are common tensors with padding when "
"this input "
"is given."
)
"Input(Label) and Output(ViterbiPath) are common tensors with "
"padding when this input is given. The data type is int64."
)
.
AsDispensable
();
AddComment
(
R"DOC(
The crf_decoding operator reads the emission feature weights and the transition
feature weights learned by the linear_chain_crf operator
. It implements the
Viterbi algorithm which is a dynamic programming algorithm for finding the most
likely sequence of hidden states, called the Viterbi path, that results in a
sequence of observed tags.
feature weights learned by the linear_chain_crf operator
and performs decoding.
It implements the Viterbi algorithm which is a dynamic programming algorithm
for finding the most likely sequence of hidden states, called the Viterbi path,
that results in a
sequence of observed tags.
The output of this operator changes according to whether Input(Label) is given:
...
...
paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
浏览文件 @
8f480ca9
...
...
@@ -46,7 +46,7 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
AddAttr
<
int
>
(
"seed"
,
"(int, default 0) "
"Random seed of generator."
"0 means
use system wide
seed."
"0 means
don't specify random
seed."
"Note that if seed is not 0, this operator will always "
"generate the same random numbers every time."
)
.
SetDefault
(
0
);
...
...
@@ -58,9 +58,9 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
AddComment
(
R"DOC(
Used to initialize tensors with gaussian random generator.
The default mean of the distribution is 0
.
and default standard
deviation (std) of the distribution is 1.. Uers can set mean and std
by
input arguments.
The default mean of the distribution is 0
,
and default standard
deviation (std) of the distribution is 1.
0
. Uers can set mean and std
via
input arguments.
)DOC"
);
}
};
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
8f480ca9
...
...
@@ -1610,11 +1610,13 @@ def crf_decoding(input, param_attr, label=None, length=None):
Args:
input(${emission_type}): ${emission_comment}
param_attr(ParamAttr): The parameter attribute for training.
param_attr (ParamAttr|None): To specify the weight parameter attribute.
Default: None, which means the default weight parameter property is
used. See usage for details in :ref:`api_fluid_ParamAttr` .
label(${label_type}): ${label_comment}
label(${label_type}
, optional
): ${label_comment}
l
abel(${length_type}
): ${length_comment}
l
ength(${length_type}, optional
): ${length_comment}
Returns:
Variable: ${viterbi_path_comment}
...
...
@@ -1626,8 +1628,8 @@ def crf_decoding(input, param_attr, label=None, length=None):
# LoDTensor-based example
num_labels = 10
feature = fluid.
layers.data(name='word_emb', shape=[
784], dtype='float32', lod_level=1)
label = fluid.
layers.data(name='label', shape=[
1], dtype='int64', lod_level=1)
feature = fluid.
data(name='word_emb', shape=[-1,
784], dtype='float32', lod_level=1)
label = fluid.
data(name='label', shape=[-1,
1], dtype='int64', lod_level=1)
emission = fluid.layers.fc(input=feature, size=num_labels)
crf_cost = fluid.layers.linear_chain_crf(input=emission, label=label,
...
...
@@ -1637,9 +1639,9 @@ def crf_decoding(input, param_attr, label=None, length=None):
# Common tensor example
num_labels, max_len = 10, 20
feature = fluid.
layers.data(name='word_emb_pad', shape=[
max_len, 784], dtype='float32')
label = fluid.
layers.data(name='label_pad', shape=[
max_len, 1], dtype='int64')
length = fluid.
layers.data(name='length', shape=[
1], dtype='int64')
feature = fluid.
data(name='word_emb_pad', shape=[-1,
max_len, 784], dtype='float32')
label = fluid.
data(name='label_pad', shape=[-1,
max_len, 1], dtype='int64')
length = fluid.
data(name='length', shape=[-1,
1], dtype='int64')
emission = fluid.layers.fc(input=feature, size=num_labels,
num_flatten_dims=2)
...
...
@@ -7078,33 +7080,33 @@ def nce(input,
${comment}
Args:
input (Variable): input variable.
label (Variable): label.
num_total_classes (int):${num_total_classes_comment}
input (Variable): Input variable, 2-D tensor with shape [batch_size, dim],
and data type is float32 or float64.
label (Variable): Input label, 2-D tensor with shape [batch_size, num_true_class],
and data type is int64.
num_total_classes (int):${num_total_classes_comment}.
sample_weight (Variable|None): A Variable of shape [batch_size, 1]
storing a weight for each sample. The default weight for each
sample is 1.0.
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of nce. If it is set to None or one attribute of ParamAttr, nce
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of nce.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, nce
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
num_neg_samples (int): ${num_neg_samples_comment}
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None.
sampler (str): The sampler used to sample class from negtive classes.
param_attr (ParamAttr|None): To specify the weight parameter attribute.
Default: None, which means the default weight parameter property is
used. See usage for details in :ref:`api_fluid_ParamAttr` .
bias_attr (ParamAttr|None): To specify the bias parameter attribute.
Default: None, which means the default bias parameter property is
used. See usage for details in :ref:`api_fluid_ParamAttr` .
num_neg_samples (int): ${num_neg_samples_comment}.
name(str|None): For detailed information, please refer to
:ref:`api_guide_Name` . Usually name is no need to set and None by default.
sampler (str, optional): The sampler used to sample class from negtive classes.
It can be 'uniform', 'log_uniform' or 'custom_dist'.
default: 'uniform'.
custom_dist (
float[]): A float[]
with size=num_total_classes.
custom_dist (
nd.array|None): A numpy ndarray
with size=num_total_classes.
It is used when sampler is set to 'custom_dist'.
custom_dist[i] is the probsbility of i-th class to be sampled.
default: None.
seed (int): The seed used in sampler. default: 0.
is_sparse(bool): The flag indicating whether to use sparse update, the weight@GRAD and bias@GRAD will be changed to SelectedRows.
seed (int, optional): The seed used in sampler. Default 0, means no random seed.
is_sparse(bool, optional): The flag indicating whether to use sparse update,
the weight@GRAD and bias@GRAD will be changed to SelectedRows. Default False.
Returns:
Variable: The output nce loss.
...
...
@@ -7119,8 +7121,8 @@ def nce(input,
window_size = 5
words = []
for i in xrange(window_size):
words.append(fluid.
layers.
data(
name='word_{0}'.format(i), shape=[1], dtype='int64'))
words.append(fluid.data(
name='word_{0}'.format(i), shape=[
-1,
1], dtype='int64'))
dict_size = 10000
label_word = int(window_size / 2) + 1
...
...
@@ -8182,17 +8184,17 @@ def one_hot(input, depth, allow_out_of_range=False):
def autoincreased_step_counter(counter_name=None, begin=1, step=1):
"""
Create an auto-increase variable
which will be automatically increased by 1 every mini-batch
Return the run counter of the main program, default is started from
1.
Create an auto-increase variable
. which will be automatically increased
by 1 in every iteration. By default, the first return of this counter is 1,
and the step size is
1.
Args:
counter_name(str
): The counter name, default is
'@STEP_COUNTER@'.
begin(int
): The first value of this counter
.
step(int
): The increment step between each execution
.
counter_name(str
, optional): The counter name. Default
'@STEP_COUNTER@'.
begin(int
, optional): The first return value of this counter. Default 1
.
step(int
, optional): The step size. Default 1
.
Returns:
Variable: The
global run counter
.
Variable: The
auto-increased Variable with data type int64
.
Examples:
.. code-block:: python
...
...
@@ -9976,9 +9978,11 @@ def gather(input, index, overwrite=True):
[5, 6]]
Args:
input (Variable): The source input with rank>=1.
index (Variable): The index input with rank=1.
overwrite (bool): The mode that updating the grad when has same index.
input (Variable): The source input tensor with rank>=1. Supported data type is
int32, int64, float32, float64 and uint8 (only for CPU),
float16 (only for GPU).
index (Variable): The index input tensor with rank=1. Data type is int32 or int64.
overwrite (bool, optional): The mode that updating the grad when has same index.
If True, use the overwrite mode to update the grad of the same index,
if False, use the accumulate mode to update the grad of the same index.
Default value is True.
...
...
@@ -9993,8 +9997,8 @@ def gather(input, index, overwrite=True):
.. code-block:: python
import paddle.fluid as fluid
x = fluid.
layers.
data(name='x', shape=[-1, 5], dtype='float32')
index = fluid.
layers.
data(name='index', shape=[-1, 1], dtype='int32')
x = fluid.data(name='x', shape=[-1, 5], dtype='float32')
index = fluid.data(name='index', shape=[-1, 1], dtype='int32')
output = fluid.layers.gather(x, index)
"""
helper = LayerHelper('gather', **locals())
...
...
@@ -10996,11 +11000,12 @@ def margin_rank_loss(label, left, right, margin=0.1, name=None):
Args:
label (Variable): Indicates whether the left is ranked higher than the right or not.
left (Variable): Ranking score for left.
right (Variable): Ranking score for right.
Data type is float32.
left (Variable): Ranking score for left. Data type float32.
right (Variable): Ranking score for right. Data type float32.
margin (float): Indicates the given margin.
name
(str|None): A name for this layer (optional). If set None, the layer
will be named automatically
.
name
(str|None): For detailed information, please refer to
:ref:`api_guide_Name` . Usually name is no need to set and None by default
.
Returns:
Variable: The ranking loss.
...
...
@@ -11013,9 +11018,9 @@ def margin_rank_loss(label, left, right, margin=0.1, name=None):
.. code-block:: python
import paddle.fluid as fluid
label = fluid.
layers.
data(name="label", shape=[-1, 1], dtype="float32")
left = fluid.
layers.
data(name="left", shape=[-1, 1], dtype="float32")
right = fluid.
layers.
data(name="right", shape=[-1, 1], dtype="float32")
label = fluid.data(name="label", shape=[-1, 1], dtype="float32")
left = fluid.data(name="left", shape=[-1, 1], dtype="float32")
right = fluid.data(name="right", shape=[-1, 1], dtype="float32")
out = fluid.layers.margin_rank_loss(label, left, right)
"""
helper = LayerHelper('margin_rank_loss', **locals())
...
...
@@ -12186,12 +12191,12 @@ def gaussian_random_batch_size_like(input,
Args:
input (Variable): ${input_comment}
shape (tuple|list): ${shape_comment}
input_dim_idx (
I
nt): ${input_dim_idx_comment}
output_dim_idx (
I
nt): ${output_dim_idx_comment}
mean (
F
loat): ${mean_comment}
std (
F
loat): ${std_comment}
seed (
I
nt): ${seed_comment}
dtype(np.dtype|core.VarDesc.VarType|str): The type of output data
: float32, float_16, int etc
input_dim_idx (
i
nt): ${input_dim_idx_comment}
output_dim_idx (
i
nt): ${output_dim_idx_comment}
mean (
f
loat): ${mean_comment}
std (
f
loat): ${std_comment}
seed (
i
nt): ${seed_comment}
dtype(np.dtype|core.VarDesc.VarType|str): The type of output data
, float32 or float_64.
Returns:
out (Variable): ${out_comment}
...
...
@@ -12200,7 +12205,7 @@ def gaussian_random_batch_size_like(input,
.. code-block:: python
import paddle.fluid as fluid
input = fluid.
layers.
data(name="input", shape=[13, 11], dtype='float32')
input = fluid.data(name="input", shape=[13, 11], dtype='float32')
out = fluid.layers.gaussian_random_batch_size_like(
input, shape=[-1, 11], mean=1.0, std=2.0)
...
...
@@ -14000,7 +14005,8 @@ def similarity_focus(input, axis, indexes, name=None):
Args:
input(Variable): The input tensor variable(default float). It should
be a 4-D tensor with shape [BatchSize, A, B, C].
be a 4-D tensor with shape [BatchSize, A, B, C]. Data type is
float32 or float64.
axis(int): Indicating the dimension to be selected. It can only be
1, 2 or 3.
indexes(list): Indicating the indexes of the selected dimension.
...
...
@@ -14013,7 +14019,7 @@ def similarity_focus(input, axis, indexes, name=None):
.. code-block:: python
import paddle.fluid as fluid
data = fluid.
layers.
data(
data = fluid.data(
name='data', shape=[-1, 3, 2, 2], dtype='float32')
fluid.layers.similarity_focus(input=data, axis=1, indexes=[0])
"""
...
...
@@ -14226,13 +14232,15 @@ def log_loss(input, label, epsilon=1e-4, name=None):
- (1 - label) * \\log{(1 - input + \\epsilon)}
Args:
input (Variable|list):
a
2-D tensor with shape [N x 1], where N is the
input (Variable|list):
A
2-D tensor with shape [N x 1], where N is the
batch size. This input is a probability computed
by the previous operator.
label (Variable|list): the ground truth which is a 2-D tensor with
shape [N x 1], where N is the batch size.
epsilon (float): epsilon
name (string): the name of log_loss
by the previous operator. Data type float32.
label (Variable|list): The ground truth which is a 2-D tensor with
shape [N x 1], where N is the batch size.
Data type float32.
epsilon (float, optional): A small number for numerical stability. Default 1e-4.
name(str|None): For detailed information, please refer to
:ref:`api_guide_Name` . Usually name is no need to set and None by default.
Returns:
Variable: A 2-D tensor with shape [N x 1], the negative log loss.
...
...
@@ -14241,8 +14249,8 @@ def log_loss(input, label, epsilon=1e-4, name=None):
.. code-block:: python
import paddle.fluid as fluid
label = fluid.
layers.data(name='label', shape=[
1], dtype='int64')
prob = fluid.
layers.data(name='prob', shape=[
10], dtype='float32')
label = fluid.
data(name='label', shape=[-1,
1], dtype='int64')
prob = fluid.
data(name='prob', shape=[-1,
10], dtype='float32')
cost = fluid.layers.log_loss(input=prob, label=label)
"""
helper = LayerHelper('log_loss', **locals())
...
...
@@ -14380,7 +14388,7 @@ def bilinear_tensor_product(x,
param_attr=None,
bias_attr=None):
"""
**
Add
Bilinear Tensor Product Layer**
**Bilinear Tensor Product Layer**
This layer performs bilinear tensor product on two inputs.
For example:
...
...
@@ -14391,31 +14399,34 @@ def bilinear_tensor_product(x,
In this formula:
- :math:`x`: the first input contains M elements, shape is [batch_size, M].
- :math:`y`: the second input contains N elements, shape is [batch_size, N].
- :math:`W_{i}`: the i-th learned weight, shape is [M, N]
- :math:`W_{i}`: the i-th learned weight, shape is [M, N]
.
- :math:`out_{i}`: the i-th element of out, shape is [batch_size, size].
- :math:`y^\mathrm{T}`: the transpose of :math:`y_{2}`.
Args:
x (Variable): 2-D input tensor with shape [batch_size, M]
y (Variable): 2-D input tensor with shape [batch_size, N]
x (Variable): 2-D input tensor with shape [batch_size, M]. Data type
is float32 or float64.
y (Variable): 2-D input tensor with shape [batch_size, N]. Data type
should be same as **x**.
size (int): The dimension of this layer.
act (str, default None): Activation to be applied to the output of this layer.
name (str, default None): The name of this layer.
param_attr (ParamAttr, default None): The parameter attribute for the learnable w.
parameters/weights of this layer.
bias_attr (ParamAttr, default None): The parameter attribute for the bias
of this layer. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None.
act (str|None): Activation to be applied to the output of this layer. Default None.
name(str|None): For detailed information, please refer to
:ref:`api_guide_Name` . Usually name is no need to set and None by default.
param_attr (ParamAttr|None): To specify the weight parameter attribute.
Default: None, which means the default weight parameter property is
used. See usage for details in :ref:`api_fluid_ParamAttr` .
bias_attr (ParamAttr|None): To specify the bias parameter attribute.
Default: None, which means the default bias parameter property is
used. See usage for details in :ref:`api_fluid_ParamAttr` .
Returns:
Variable: A 2-D Tensor of shape [batch_size, size].
Variable: A 2-D Tensor of shape [batch_size, size].
Data type is the same as input **x**.
Examples:
.. code-block:: python
import paddle.fluid as fluid
layer1 = fluid.
layers.
data("t1", shape=[-1, 5], dtype="float32")
layer2 = fluid.
layers.
data("t2", shape=[-1, 4], dtype="float32")
layer1 = fluid.data("t1", shape=[-1, 5], dtype="float32")
layer2 = fluid.data("t2", shape=[-1, 4], dtype="float32")
tensor = fluid.layers.bilinear_tensor_product(x=layer1, y=layer2, size=1000)
"""
helper = LayerHelper('bilinear_tensor_product', **locals())
...
...
python/paddle/fluid/nets.py
浏览文件 @
8f480ca9
...
...
@@ -318,8 +318,9 @@ def sequence_conv_pool(input,
def
glu
(
input
,
dim
=-
1
):
"""
The Gated Linear Units(GLU) composed by split, sigmoid activation and element-wise
multiplication. Specifically, Split the input into two equal sized parts,
The Gated Linear Units(GLU) composed by :ref:`api_fluid_layers_split` ,
:ref:`api_fluid_layers_sigmoid` and :ref:`api_fluid_layers_elementwise_mul` .
Specifically, GLU will plit the input into two equal-sized parts,
:math:`a` and :math:`b`, along the given dimension and then compute as
following:
...
...
@@ -331,18 +332,20 @@ def glu(input, dim=-1):
<https://arxiv.org/pdf/1612.08083.pdf>`_.
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (int): The dimension along which to split. If :math:`dim < 0`, the
input (Variable): The input variable which is a Tensor or LoDTensor.
The supported data types include float32, float64
and float16 (only for GPU).
dim (int, optional): The dimension along which to split. If :math:`dim < 0`, the
dimension to split along is :math:`rank(input) + dim`. Default -1.
Returns:
Variable: Variable with half the size of input.
Variable: Variable with half the size
and same data type
of input.
Examples:
.. code-block:: python
import paddle.fluid as fluid
data = fluid.
layers.
data(
data = fluid.data(
name="words", shape=[-1, 6, 3, 9], dtype="float32")
# shape of output: [-1, 3, 3, 9]
output = fluid.nets.glu(input=data, dim=1)
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
8f480ca9
...
...
@@ -2254,11 +2254,11 @@ class LambOptimizer(AdamOptimizer):
.. math::
m_t &=
\\
beta_1 m_{t - 1}+ (1 -
\\
beta_1)g_t
\\
m_t &=
\\
beta_1 m_{t - 1}+ (1 -
\\
beta_1)g_t
v_t &=
\\
beta_2 v_{t - 1} + (1 -
\\
beta_2)g_t^2
\\
v_t &=
\\
beta_2 v_{t - 1} + (1 -
\\
beta_2)g_t^2
r_t &=
\\
frac{m_t}{
\\
sqrt{v_t}+
\\
epsilon}
\\
r_t &=
\\
frac{m_t}{
\\
sqrt{v_t}+
\\
epsilon}
w_t &= w_{t-1} -
\\
eta_t
\\
frac{
\\
left \| w_{t-1}
\\
right \|}{
\\
left \| r_t +
\\
lambda w_{t-1}
\\
right \|} (r_t +
\\
lambda w_{t-1})
...
...
@@ -2267,25 +2267,28 @@ class LambOptimizer(AdamOptimizer):
learning rate, :math:`
\\
lambda` the LAMB weight decay rate.
Args:
learning_rate (float|Variable): the learning rate used to update parameters.
\
Can be a float value or a Variable with one
\
float value as data element.
lamb_weight_decay (float): The LAMB weight decay rate.
beta1 (float): The exponential decay rate for the 1st moment estimates.
beta2 (float): The exponential decay rate for the 2nd moment estimates.
epsilon (float): A small float value for numerical stability.
regularization (Regularizer): A Regularizer, such as
fluid.regularizer.L1DecayRegularizer.
exclude_from_weight_decay_fn (function): Exclude a parameter from weight
decay when **exclude_from_weight_decay_fn(parameter)** returns true.
name (str|None): An optional name prefix.
learning_rate (float|Variable, optional): the learning rate used to update parameters.
\
Can be a float value or a Variable with data type float32. Default 0.001.
lamb_weight_decay (float, optional): The LAMB weight decay rate. Default 0.01.
beta1 (float, optional): The exponential decay rate for the 1st moment estimates.
Default 0.9.
beta2 (float, optional): The exponential decay rate for the 2nd moment estimates.
Default 0.999.
epsilon (float, optional): A small float value for numerical stability. Default 1e-6.
regularization (Regularizer|None): A Regularizer, such as
fluid.regularizer.L1DecayRegularizer. Default None.
exclude_from_weight_decay_fn (function|None): Exclude a parameter from weight
decay when **exclude_from_weight_decay_fn(parameter)** returns true.
Default None.
name(str|None): For detailed information, please refer to
:ref:`api_guide_Name` . Usually name is no need to set and None by default.
Examples:
.. code-block:: python
import paddle.fluid as fluid
data = fluid.
layers.data(name='x', shape=[
5], dtype='float32')
data = fluid.
data(name='x', shape=[-1,
5], dtype='float32')
hidden = fluid.layers.fc(input=data, size=10)
cost = fluid.layers.mean(hidden)
...
...
@@ -2738,10 +2741,13 @@ class ExponentialMovingAverage(object):
Args:
decay (float): The exponential decay rate, usually close to 1, such as
0.999, 0.9999, ... .
thres_steps (Variable|None): If not `None`, schedule the decay rate.
name (str|None): An optional name prefix.
decay (float, optional): The exponential decay rate, usually close to 1, such as
0.999, 0.9999, ... . Default 0.999.
thres_steps (Variable|None): If not `None`, schedule the decay rate.
Default None.
name (str|None): For detailed information, please refer to
:ref:`api_guide_Name`. Usually name is no need to set and None by
default.
Examples:
...
...
@@ -2752,7 +2758,7 @@ class ExponentialMovingAverage(object):
import paddle
import paddle.fluid as fluid
data = fluid.
layers.data(name='x', shape=[
5], dtype='float32')
data = fluid.
data(name='x', shape=[-1,
5], dtype='float32')
hidden = fluid.layers.fc(input=data, size=10)
cost = fluid.layers.mean(hidden)
...
...
@@ -2910,7 +2916,8 @@ class ExponentialMovingAverage(object):
Args:
executor (Executor): The Executor to execute applying.
need_restore (bool): Whether to restore parameters after applying.
need_restore (bool, optional): Whether to restore parameters after
applying. Default True.
"""
executor
.
run
(
self
.
apply_program
)
try
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录