提交 baf02328 编写于 作者: Q Qiao Longfei

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add-async-ssa-graph-executor-communicator
test=develop
...@@ -34,7 +34,7 @@ ExternalProject_Add( ...@@ -34,7 +34,7 @@ ExternalProject_Add(
BUILD_IN_SOURCE 1 BUILD_IN_SOURCE 1
) )
ADD_LIBRARY(dgc SHARED IMPORTED GLOBAL) ADD_LIBRARY(dgc STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET dgc PROPERTY IMPORTED_LOCATION ${DGC_LIBRARIES}) SET_PROPERTY(TARGET dgc PROPERTY IMPORTED_LOCATION ${DGC_LIBRARIES})
ADD_DEPENDENCIES(dgc extern_dgc) ADD_DEPENDENCIES(dgc extern_dgc)
......
...@@ -95,7 +95,7 @@ paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', ...@@ -95,7 +95,7 @@ paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size',
paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '37042620f9bd3a2da6e5d3138b2f724b')) paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '37042620f9bd3a2da6e5d3138b2f724b'))
paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,)), ('document', 'a194fb80614023f543df3949fbd0d0b8')) paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,)), ('document', 'a194fb80614023f543df3949fbd0d0b8'))
paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '19ef6f9cdd27feac8a1ae060f19c10b4')) paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '19ef6f9cdd27feac8a1ae060f19c10b4'))
paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', 'f19dd380864e61134ce3814e4be0de4b')) paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name', 'axis'], varargs=None, keywords=None, defaults=(False, None, -1)), ('document', '59b1c6bf2f0fa9dc649c85fef3a3b2ea'))
paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', 'bbd84e855e660cd1084bb71a2fd0cdaa')) paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', 'bbd84e855e660cd1084bb71a2fd0cdaa'))
paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', '043de7333b79ee0ac55053c14ed81625')) paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', '043de7333b79ee0ac55053c14ed81625'))
paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '859b887174d06f361658f69cb7c06d95')) paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '859b887174d06f361658f69cb7c06d95'))
...@@ -134,7 +134,7 @@ paddle.fluid.layers.sampled_softmax_with_cross_entropy (ArgSpec(args=['logits', ...@@ -134,7 +134,7 @@ paddle.fluid.layers.sampled_softmax_with_cross_entropy (ArgSpec(args=['logits',
paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', '80641ee6810b1cdc3fd6e14fc89ecc9d')) paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', '80641ee6810b1cdc3fd6e14fc89ecc9d'))
paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', 'b350b9a30a18e7efd7e1bb740eef6996')) paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', 'b350b9a30a18e7efd7e1bb740eef6996'))
paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)), ('document', '17485788fffe4e2d36dc58c2ac8d174e')) paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)), ('document', '17485788fffe4e2d36dc58c2ac8d174e'))
paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '013795af319e2e86d3506741941078ee')) paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '2c4d1ae83da6ed35e3b36ba1b3b51d23'))
paddle.fluid.layers.layer_norm (ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)), ('document', 'de6a906950bae9f3c245cb744d22b94e')) paddle.fluid.layers.layer_norm (ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)), ('document', 'de6a906950bae9f3c245cb744d22b94e'))
paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)), ('document', '419c3a24a83cc89219a029cf4092788b')) paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)), ('document', '419c3a24a83cc89219a029cf4092788b'))
paddle.fluid.layers.spectral_norm (ArgSpec(args=['weight', 'dim', 'power_iters', 'eps', 'name'], varargs=None, keywords=None, defaults=(0, 1, 1e-12, None)), ('document', '3f536aafba30d793287b52d231baff1b')) paddle.fluid.layers.spectral_norm (ArgSpec(args=['weight', 'dim', 'power_iters', 'eps', 'name'], varargs=None, keywords=None, defaults=(0, 1, 1e-12, None)), ('document', '3f536aafba30d793287b52d231baff1b'))
...@@ -225,10 +225,12 @@ paddle.fluid.layers.merge_selected_rows (ArgSpec(args=['x', 'name'], varargs=Non ...@@ -225,10 +225,12 @@ paddle.fluid.layers.merge_selected_rows (ArgSpec(args=['x', 'name'], varargs=Non
paddle.fluid.layers.get_tensor_from_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '7ffc849e71f31dfe29030ff94e662de6')) paddle.fluid.layers.get_tensor_from_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '7ffc849e71f31dfe29030ff94e662de6'))
paddle.fluid.layers.lstm (ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1)), ('document', 'd5e6c494ac35100e2ed4d4bd9a1ed932')) paddle.fluid.layers.lstm (ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1)), ('document', 'd5e6c494ac35100e2ed4d4bd9a1ed932'))
paddle.fluid.layers.shuffle_channel (ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2fa6782d43d02ae64482d21235a82949')) paddle.fluid.layers.shuffle_channel (ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2fa6782d43d02ae64482d21235a82949'))
paddle.fluid.layers.temporal_shift (ArgSpec(args=['x', 'seg_num', 'shift_ratio', 'name'], varargs=None, keywords=None, defaults=(0.25, None)), ('document', 'fe4481fb31363b09cfdd228fc6776ddf'))
paddle.fluid.layers.py_func (ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None)), ('document', '8404e472ac12b4a30a505d3d3a3e5fdb')) paddle.fluid.layers.py_func (ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None)), ('document', '8404e472ac12b4a30a505d3d3a3e5fdb'))
paddle.fluid.layers.psroi_pool (ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1546136806fef5c08f6918544bd9151d')) paddle.fluid.layers.psroi_pool (ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1546136806fef5c08f6918544bd9151d'))
paddle.fluid.layers.teacher_student_sigmoid_loss (ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0)), ('document', '2f6ff96864054a31aa4bb659c6722c99')) paddle.fluid.layers.teacher_student_sigmoid_loss (ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0)), ('document', '2f6ff96864054a31aa4bb659c6722c99'))
paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None), ('document', '431a4301c35032166ec029f7432c80a7')) paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None), ('document', '431a4301c35032166ec029f7432c80a7'))
paddle.fluid.layers.kldiv_loss (ArgSpec(args=['x', 'target', 'reduction', 'name'], varargs=None, keywords=None, defaults=('mean', None)), ('document', '776d536cac47c89073abc7ee524d5aec'))
paddle.fluid.layers.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '34ea12ac9f10a65dccbc50100d12e607')) paddle.fluid.layers.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '34ea12ac9f10a65dccbc50100d12e607'))
paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l2_reg'], varargs=None, keywords=None, defaults=(0.002,)), ('document', '46994d10276dd4cb803b4062b5d14329')) paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l2_reg'], varargs=None, keywords=None, defaults=(0.002,)), ('document', '46994d10276dd4cb803b4062b5d14329'))
paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'b76ccca3735bea4a58a0dbf0d77c5393')) paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'b76ccca3735bea4a58a0dbf0d77c5393'))
...@@ -276,7 +278,7 @@ paddle.fluid.layers.array_write (ArgSpec(args=['x', 'i', 'array'], varargs=None, ...@@ -276,7 +278,7 @@ paddle.fluid.layers.array_write (ArgSpec(args=['x', 'i', 'array'], varargs=None,
paddle.fluid.layers.create_array (ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None), ('document', '2d4f20087080ba5105b55205ad5c5b6a')) paddle.fluid.layers.create_array (ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None), ('document', '2d4f20087080ba5105b55205ad5c5b6a'))
paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords=None, defaults=(None, None)), ('document', '067bbc799c66289ca8b8924c26b6673f')) paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords=None, defaults=(None, None)), ('document', '067bbc799c66289ca8b8924c26b6673f'))
paddle.fluid.layers.equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '80c29b1dc64718f0116de90d1ac88a77')) paddle.fluid.layers.equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '80c29b1dc64718f0116de90d1ac88a77'))
paddle.fluid.layers.array_read (ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None), ('document', '0275133f1dde2aed528b4d3230edf823')) paddle.fluid.layers.array_read (ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None), ('document', 'dd68bead34dfbaf6b0a163fc1cc3c385'))
paddle.fluid.layers.array_length (ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None), ('document', 'ffb8b9578ec66db565b223d313aa82a2')) paddle.fluid.layers.array_length (ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None), ('document', 'ffb8b9578ec66db565b223d313aa82a2'))
paddle.fluid.layers.IfElse.__init__ (ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.IfElse.__init__ (ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.IfElse.false_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.IfElse.false_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
...@@ -357,6 +359,7 @@ paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], vara ...@@ -357,6 +359,7 @@ paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], vara
paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28')) paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28'))
paddle.fluid.layers.append_LARS (ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None), ('document', 'd24fa1e7d62ac8a534fc6a86002f84f8')) paddle.fluid.layers.append_LARS (ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None), ('document', 'd24fa1e7d62ac8a534fc6a86002f84f8'))
paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', '9588c64c26ffaef3c466e404a6af9d9b')) paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', '9588c64c26ffaef3c466e404a6af9d9b'))
paddle.fluid.layers.linear_lr_warmup (ArgSpec(args=['learning_rate', 'warmup_steps', 'start_lr', 'end_lr'], varargs=None, keywords=None, defaults=None), ('document', '2ef3f5ca5cd71ea4217c418e5a7a0565'))
paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.StateCell.__init__ (ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.StateCell.__init__ (ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.StateCell.compute_state (ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None), ('document', '92973b3f222081a1d17069c683cf4a99')) paddle.fluid.contrib.StateCell.compute_state (ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None), ('document', '92973b3f222081a1d17069c683cf4a99'))
...@@ -406,6 +409,7 @@ paddle.fluid.contrib.HDFSClient.rename (ArgSpec(args=['self', 'hdfs_src_path', ' ...@@ -406,6 +409,7 @@ paddle.fluid.contrib.HDFSClient.rename (ArgSpec(args=['self', 'hdfs_src_path', '
paddle.fluid.contrib.HDFSClient.upload (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5)), ('document', '7d053b4bfd6dcfdd2c9dda0e0dbd9665')) paddle.fluid.contrib.HDFSClient.upload (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5)), ('document', '7d053b4bfd6dcfdd2c9dda0e0dbd9665'))
paddle.fluid.contrib.multi_download (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)), ('document', '100927be598ed8f9eaa1f3ef1b23568a')) paddle.fluid.contrib.multi_download (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)), ('document', '100927be598ed8f9eaa1f3ef1b23568a'))
paddle.fluid.contrib.multi_upload (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)), ('document', '183f34c83d30dbe16e09e8716c41958a')) paddle.fluid.contrib.multi_upload (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)), ('document', '183f34c83d30dbe16e09e8716c41958a'))
paddle.fluid.contrib.extend_with_decoupled_weight_decay (ArgSpec(args=['base_optimizer'], varargs=None, keywords=None, defaults=None), ('document', 'a1095dfd4ec725747f662d69cd7659d4'))
paddle.fluid.transpiler.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.transpiler.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680')) paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680'))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8')) paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8'))
...@@ -428,63 +432,75 @@ paddle.fluid.nets.scaled_dot_product_attention (ArgSpec(args=['queries', 'keys', ...@@ -428,63 +432,75 @@ paddle.fluid.nets.scaled_dot_product_attention (ArgSpec(args=['queries', 'keys',
paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)), ('document', '3802be78fbfb206dae64a2d9f8480970')) paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)), ('document', '3802be78fbfb206dae64a2d9f8480970'))
paddle.fluid.optimizer.SGDOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.SGDOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.SGDOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) paddle.fluid.optimizer.SGDOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.SGDOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.SGDOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.SGDOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.SGDOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.SGDOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.SGDOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.SGDOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.MomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.MomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.MomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) paddle.fluid.optimizer.MomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.MomentumOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.MomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.MomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.MomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.MomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.MomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.MomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) paddle.fluid.optimizer.AdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdagradOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.AdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.AdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdagradOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdagradOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.AdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdamOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdamOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) paddle.fluid.optimizer.AdamOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdamOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.AdamOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.AdamOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdamOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdamOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.AdamOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdamaxOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdamaxOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdamaxOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.AdamaxOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.AdamaxOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdamaxOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdamaxOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamaxOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.AdamaxOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.DecayedAdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.DecayedAdagradOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.FtrlOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.FtrlOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.FtrlOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) paddle.fluid.optimizer.FtrlOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.FtrlOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.FtrlOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.FtrlOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.FtrlOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.FtrlOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.FtrlOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.FtrlOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.RMSPropOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.RMSPropOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.RMSPropOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.RMSPropOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.RMSPropOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.AdadeltaOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.AdadeltaOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdadeltaOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdadeltaOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdadeltaOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.AdadeltaOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.ModelAverage.__init__ (ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.ModelAverage.__init__ (ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.ModelAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '46234a5470590feb336346f70a3db715')) paddle.fluid.optimizer.ModelAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '46234a5470590feb336346f70a3db715'))
paddle.fluid.optimizer.ModelAverage.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) paddle.fluid.optimizer.ModelAverage.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.ModelAverage.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.ModelAverage.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.ModelAverage.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.ModelAverage.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.ModelAverage.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.ModelAverage.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.ModelAverage.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.ModelAverage.restore (ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None), ('document', '18db9c70be9c4dd466f9844457b21bfe')) paddle.fluid.optimizer.ModelAverage.restore (ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None), ('document', '18db9c70be9c4dd466f9844457b21bfe'))
paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.LarsMomentumOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.LarsMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.LarsMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.LarsMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.LarsMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.LarsMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.LarsMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.DGCMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'rampup_begin_step', 'rampup_step', 'sparsity', 'use_nesterov', 'local_grad_clip_norm', 'num_trainers', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1, [0.999], False, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.DGCMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'rampup_begin_step', 'rampup_step', 'sparsity', 'use_nesterov', 'local_grad_clip_norm', 'num_trainers', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1, [0.999], False, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.DGCMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) paddle.fluid.optimizer.DGCMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.DGCMomentumOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
paddle.fluid.optimizer.DGCMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.DGCMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.DGCMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.DGCMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.DGCMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.DGCMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
......
...@@ -195,8 +195,7 @@ cc_library(prune SRCS prune.cc DEPS framework_proto) ...@@ -195,8 +195,7 @@ cc_library(prune SRCS prune.cc DEPS framework_proto)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry
proto_desc) proto_desc)
cc_test(inplace_op_inference_test SRCS inplace_op_inference_test.cc DEPS op_registry proto_desc op_info memory_optimize_helper) cc_test(inplace_op_inference_test SRCS inplace_op_inference_test.cc DEPS inplace_op_pass op_registry proto_desc op_info memory_optimize_helper pass_builder)
cc_library(selected_rows SRCS selected_rows.cc DEPS tensor) cc_library(selected_rows SRCS selected_rows.cc DEPS tensor)
cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows) cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)
......
...@@ -25,8 +25,12 @@ if(WITH_DISTRIBUTE) ...@@ -25,8 +25,12 @@ if(WITH_DISTRIBUTE)
endif() endif()
if(WITH_GPU) if(WITH_GPU)
set(dgc_deps "")
if(NOT WIN32)
set(dgc_deps dgc)
endif()
nv_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory nv_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda variable_visitor dgc) dynload_cuda variable_visitor ${dgc_deps})
nv_library(fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory nv_library(fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda variable_visitor) dynload_cuda variable_visitor)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
......
...@@ -68,7 +68,7 @@ void AllReduceDepsPass::ApplyImpl(ir::Graph* graph) const { ...@@ -68,7 +68,7 @@ void AllReduceDepsPass::ApplyImpl(ir::Graph* graph) const {
for (auto& o_it : outputs) { for (auto& o_it : outputs) {
for (auto& v : o_it.second) { // values for (auto& v : o_it.second) { // values
vars[v] = order; vars[v] = order;
VLOG(1) << "in all_reduce_deps_pass:" << v; VLOG(10) << "in all_reduce_deps_pass:" << v;
} }
} }
order++; order++;
......
...@@ -156,7 +156,6 @@ void InplacePass::ApplyImpl(ir::Graph* graph) const { ...@@ -156,7 +156,6 @@ void InplacePass::ApplyImpl(ir::Graph* graph) const {
continue; continue;
TryInplaceOpInputOutput(op, graph); TryInplaceOpInputOutput(op, graph);
} }
// graph->ResolveHazard(var_nodes_);
} }
void InplacePass::InplaceModifyDesc(const std::string& var, void InplacePass::InplaceModifyDesc(const std::string& var,
...@@ -168,7 +167,7 @@ void InplacePass::InplaceModifyDesc(const std::string& var, ...@@ -168,7 +167,7 @@ void InplacePass::InplaceModifyDesc(const std::string& var,
auto* op_desc = op->Op(); auto* op_desc = op->Op();
op_desc->RenameInput(var, cache_var); op_desc->RenameInput(var, cache_var);
op_desc->RenameOutput(var, cache_var); op_desc->RenameOutput(var, cache_var);
if (op_desc->Block()->HasVar(var)) op_desc->Block()->RemoveVar(var);
op_desc->Flush(); op_desc->Flush();
} }
} }
...@@ -265,8 +264,6 @@ void InplacePass::WithdrawModify(const NodeSwapQueue& nodes, ...@@ -265,8 +264,6 @@ void InplacePass::WithdrawModify(const NodeSwapQueue& nodes,
void InplacePass::TryInplaceOpInputOutput(ir::Node* op, void InplacePass::TryInplaceOpInputOutput(ir::Node* op,
ir::Graph* graph) const { ir::Graph* graph) const {
VLOG(4) << "Try to inplace op " << op->Name(); VLOG(4) << "Try to inplace op " << op->Name();
// PADDLE_ENFORCE(op->Op() != nullptr && op->Op()->Block() != nullptr,
// "op_desc is nullptr");
// some pre-requirments need to meet if the op want to inplaced. // some pre-requirments need to meet if the op want to inplaced.
PADDLE_ENFORCE(op->Op() != nullptr, "op_desc is nullptr"); PADDLE_ENFORCE(op->Op() != nullptr, "op_desc is nullptr");
...@@ -446,19 +443,20 @@ bool GraphView::CheckDeps(ir::Node* var, ir::Node* current_op) const { ...@@ -446,19 +443,20 @@ bool GraphView::CheckDeps(ir::Node* var, ir::Node* current_op) const {
// check if op2 depends on op1's output // check if op2 depends on op1's output
bool GraphView::CheckOpDeps(ir::Node* op1, ir::Node* op2) const { bool GraphView::CheckOpDeps(ir::Node* op1, ir::Node* op2) const {
auto print_op = [&](ir::Node* op, const char* name) { if (VLOG_IS_ON(4)) {
std::ostringstream os; auto print_op = [&](ir::Node* op, const char* name) {
os << " " << name << " : " << op->Name() << " "; std::ostringstream os;
os << "Input args : "; os << " " << name << " : " << op->Name() << " ";
for (auto& arg : op->inputs) os << arg->Name() << " "; os << "Input args : ";
os << "Output args : "; for (auto& arg : op->inputs) os << arg->Name() << " ";
for (auto& arg : op->outputs) os << arg->Name() << " "; os << "Output args : ";
os << "Level : " << op_level_.at(op); for (auto& arg : op->outputs) os << arg->Name() << " ";
VLOG(4) << os.str(); os << "Level : " << op_level_.at(op);
}; VLOG(4) << os.str();
print_op(op1, "OP1"); };
print_op(op2, "OP2"); print_op(op1, "OP1");
print_op(op2, "OP2");
}
if (op1 == op2) return true; if (op1 == op2) return true;
if (op_level_.at(op1) >= op_level_.at(op2)) return false; if (op_level_.at(op1) >= op_level_.at(op2)) return false;
......
...@@ -142,16 +142,15 @@ TEST(OrderedSet, FindBestFitNode) { ...@@ -142,16 +142,15 @@ TEST(OrderedSet, FindBestFitNode) {
for (auto& node : nodes) { for (auto& node : nodes) {
pool.Insert(node.get()); pool.Insert(node.get());
} }
// FIXME(liuwei1031) this API has changed,
// disable these tests temporarily auto* n = nodes[0].get();
// FindNextBestFitNode auto* cache = pool.FindBestFitNode(n);
// auto* n = nodes[0].get(); ASSERT_TRUE(cache->Name() == "a" || cache->Name() == "c");
// auto* cache = pool.FindBestFitNode(n); auto* cache_b = pool.FindNextBestFitNode(n, cache);
// PADDLE_ENFORCE(cache->Name() == "a"); ASSERT_TRUE(cache_b->Name() != cache->Name());
// cache = pool.FindNextBestFitNode(n, cache); ASSERT_TRUE(cache_b->Name() == "a" || cache_b->Name() == "c");
// PADDLE_ENFORCE(cache->Name() == "c"); cache = pool.FindNextBestFitNode(n, cache_b);
// cache = pool.FindNextBestFitNode(n, cache); ASSERT_TRUE(cache == nullptr);
// PADDLE_ENFORCE(cache->Name() == "b");
} }
} // namespace details } // namespace details
......
...@@ -12,9 +12,14 @@ ...@@ -12,9 +12,14 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <iostream>
#include <iterator> #include <iterator>
#include <memory>
#include <string> #include <string>
#include <vector>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/framework/details/inplace_op_pass.h"
#include "paddle/fluid/framework/ir/pass_builder.h"
#include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
...@@ -165,118 +170,147 @@ REGISTER_OPERATOR(multi_out_grad, f::NOP, f::MultiOutGradInplaceInToOut, ...@@ -165,118 +170,147 @@ REGISTER_OPERATOR(multi_out_grad, f::NOP, f::MultiOutGradInplaceInToOut,
namespace paddle { namespace paddle {
namespace framework { namespace framework {
// TEST(InferInplace, SingleOpInplaceInToOut) { void FakeSuccData(ProgramDesc* prog) { // NOLINT
// ProgramDesc prog; prog->MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR);
// auto* op = prog.MutableBlock(0)->AppendOp(); prog->MutableBlock(0)->Var("test2_a")->SetShape({32, 64, 128, 128});
// op->SetType("single_op"); prog->MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR);
// op->SetInput("X", {"test2_a", "test2_b", "test2_c"}); prog->MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR);
// op->SetOutput("Out", {"test2_out"}); prog->MutableBlock(0)->Var("test2_out");
// prog->MutableBlock(0)->Var("test2_out")->SetShape({64, 32, 128, 128});
// prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); }
// prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 64, 128, 128});
// prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); void FakeNoInplaceData(ProgramDesc* prog) { // NOLINT
// prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); prog->MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR);
// prog.MutableBlock(0)->Var("test2_out"); prog->MutableBlock(0)->Var("test2_a")->SetShape({32, 64, 128, 128});
// prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16, 128, 128}); prog->MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR);
// prog->MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR);
// auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; prog->MutableBlock(0)->Var("test2_out");
// auto in_to_outs = infer_inplace(*op); prog->MutableBlock(0)->Var("test2_out")->SetShape({64, 31, 128, 128});
// EXPECT_EQ(in_to_outs.size(), 1ul); }
// auto it = in_to_outs.begin();
// EXPECT_EQ(it->first, "test2_a"); ir::Node* GetNodeFromGraph(ir::Graph* g, std::string name) {
// EXPECT_EQ(it->second, "test2_out"); ir::Node* op_node = nullptr;
// } for (auto& item : g->Nodes()) {
// if (item->Name() == name) {
// TEST(InferInplace, SingleGradOpInplaceInToOut) { op_node = item;
// ProgramDesc prog; break;
// auto* op = prog.MutableBlock(0)->AppendOp(); }
// op->SetType("single_op_grad"); }
// op->SetInput(GradVarName("Out"), {"test2_out"}); return op_node;
// op->SetOutput(GradVarName("X"), {"test2_a", "test2_b", "test2_c"}); }
//
// prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); std::unique_ptr<ir::Graph> test_SingleOpInplaceInToOut(
// prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 16, 1024, 1024}); std::unique_ptr<ir::Graph> g) {
// prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); std::unique_ptr<details::InplacePass> pass(new details::InplacePass());
// prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); ir::Node* op_node = GetNodeFromGraph(g.get(), "single_op");
// prog.MutableBlock(0)->Var("test2_out"); EXPECT_NE(op_node, nullptr);
// prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16, 1024, 1024}); pass->Apply(g.get());
// return g;
// auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; }
// auto in_to_outs = infer_inplace(*op);
// EXPECT_EQ(in_to_outs.size(), 1ul); TEST(InferInplace, SingleOpInplaceInToOut) {
// auto it = in_to_outs.begin(); ProgramDesc prog;
// EXPECT_EQ(it->first, "test2_out"); auto* op = prog.MutableBlock(0)->AppendOp();
// EXPECT_EQ(it->second, "test2_a"); op->SetType("single_op");
// } op->SetInput("X", {"test2_a", "test2_b", "test2_c"});
// op->SetOutput("Out", {"test2_out"});
// TEST(InferInplace, MultiOutInplaceInToOut) {
// ProgramDesc prog; FakeSuccData(&prog);
// auto* op = prog.MutableBlock(0)->AppendOp(); std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
// op->SetType("multi_out_op"); g = test_SingleOpInplaceInToOut(std::move(g));
// op->SetInput("X", {"a0", "a1"}); auto op_node = GetNodeFromGraph(g.get(), "single_op");
// op->SetInput("Y", {"b0"});
// op->SetInput("Z", {"c0", "c1"}); EXPECT_EQ(op_node->outputs[0]->Name(), "test2_a");
// op->SetOutput("Out", {"o0"}); }
// op->SetOutput("YOut", {"y0"});
// op->SetOutput("ZOut", {"z0"}); TEST(InferInplace, SingleOpInplaceInToOutNoInplace) {
// ProgramDesc prog;
// prog.MutableBlock(0)->Var("a0")->SetType(proto::VarType::LOD_TENSOR); auto* op = prog.MutableBlock(0)->AppendOp();
// prog.MutableBlock(0)->Var("b0")->SetType(proto::VarType::LOD_TENSOR); op->SetType("single_op");
// prog.MutableBlock(0)->Var("c0")->SetType(proto::VarType::LOD_TENSOR); op->SetInput("X", {"test2_a", "test2_b", "test2_c"});
// prog.MutableBlock(0)->Var("c1")->SetType(proto::VarType::LOD_TENSOR); op->SetOutput("Out", {"test2_out"});
// prog.MutableBlock(0)->Var("o0");
// prog.MutableBlock(0)->Var("y0"); FakeNoInplaceData(&prog);
// prog.MutableBlock(0)->Var("z0"); std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
// prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); g = test_SingleOpInplaceInToOut(std::move(g));
// prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); auto op_node = GetNodeFromGraph(g.get(), "single_op");
// prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024});
// prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); EXPECT_EQ(op_node->outputs[0]->Name(), "test2_out");
// prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); }
// prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024});
// TEST(InferInplace, MultiOutInplaceInToOut) {
// auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; ProgramDesc prog;
// auto in_to_outs = infer_inplace(*op); auto* op = prog.MutableBlock(0)->AppendOp();
// EXPECT_EQ(in_to_outs.size(), 3ul); op->SetType("multi_out_op");
// std::unordered_map<std::string, std::string> expects = { op->SetInput("X", {"a0", "a1"});
// {"a0", "o0"}, {"b0", "y0"}, {"c0", "z0"}, op->SetInput("Y", {"b0"});
// }; op->SetInput("Z", {"c0", "c1"});
// EXPECT_TRUE(expects == in_to_outs); op->SetOutput("Out", {"o0"});
// } op->SetOutput("YOut", {"y0"});
// op->SetOutput("ZOut", {"z0"});
// TEST(InferInplace, MultiGradInplaceInToOut) {
// ProgramDesc prog; prog.MutableBlock(0)->Var("a0")->SetType(proto::VarType::LOD_TENSOR);
// auto* op = prog.MutableBlock(0)->AppendOp(); prog.MutableBlock(0)->Var("b0")->SetType(proto::VarType::LOD_TENSOR);
// op->SetType("multi_out_grad"); prog.MutableBlock(0)->Var("c0")->SetType(proto::VarType::LOD_TENSOR);
// op->SetInput(GradVarName("Out"), {"o0"}); prog.MutableBlock(0)->Var("c1")->SetType(proto::VarType::LOD_TENSOR);
// op->SetInput(GradVarName("YOut"), {"y0"}); prog.MutableBlock(0)->Var("o0");
// op->SetInput(GradVarName("ZOut"), {"z0"}); prog.MutableBlock(0)->Var("y0");
// op->SetOutput(GradVarName("X"), {"a0", "a1"}); prog.MutableBlock(0)->Var("z0");
// op->SetOutput(GradVarName("Y"), {"b0"}); prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024});
// op->SetOutput(GradVarName("Z"), {"c0", "c1"}); prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024});
// prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024});
// prog.MutableBlock(0)->Var("a0")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024});
// prog.MutableBlock(0)->Var("b0")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024});
// prog.MutableBlock(0)->Var("c0")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024});
// prog.MutableBlock(0)->Var("c1")->SetType(proto::VarType::LOD_TENSOR);
// prog.MutableBlock(0)->Var("o0"); std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
// prog.MutableBlock(0)->Var("y0"); std::unique_ptr<details::InplacePass> pass(new details::InplacePass());
// prog.MutableBlock(0)->Var("z0"); pass->Apply(g.get());
// prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); auto op_node = GetNodeFromGraph(g.get(), "multi_out_op");
// prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); ASSERT_TRUE(op_node != nullptr);
// prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); EXPECT_EQ(op_node->outputs[0]->Name(), "a0");
// prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); EXPECT_EQ(op_node->outputs[1]->Name(), "b0");
// prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); EXPECT_EQ(op_node->outputs[2]->Name(), "c0");
// prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024}); }
//
// auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; TEST(InferInplace, MultiGradInplaceInToOut) {
// auto in_to_outs = infer_inplace(*op); ProgramDesc prog;
// auto* op = prog.MutableBlock(0)->AppendOp();
// EXPECT_EQ(in_to_outs.size(), 3ul); op->SetType("multi_out_grad");
// std::unordered_map<std::string, std::string> expects = { op->SetInput(GradVarName("Out"), {"o0"});
// {"o0", "a0"}, {"y0", "b0"}, {"z0", "c0"}, op->SetInput(GradVarName("YOut"), {"y0"});
// }; op->SetInput(GradVarName("ZOut"), {"z0"});
// EXPECT_TRUE(expects == in_to_outs); op->SetOutput(GradVarName("X"), {"a0", "a1"});
// } op->SetOutput(GradVarName("Y"), {"b0"});
op->SetOutput(GradVarName("Z"), {"c0", "c1"});
prog.MutableBlock(0)->Var("a0")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("b0")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("c0")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("c1")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("o0");
prog.MutableBlock(0)->Var("y0");
prog.MutableBlock(0)->Var("z0");
prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("z0")->SetShape({32, 15, 1024, 1024});
std::unique_ptr<ir::Graph> g(new ir::Graph(prog));
std::unique_ptr<details::InplacePass> pass(new details::InplacePass());
pass->Apply(g.get());
auto op_node = GetNodeFromGraph(g.get(), "multi_out_grad");
ASSERT_TRUE(op_node != nullptr);
EXPECT_EQ(op_node->outputs[0]->Name(), "o0");
EXPECT_EQ(op_node->outputs[2]->Name(), "y0");
EXPECT_EQ(op_node->outputs[3]->Name(), "c0");
std::unordered_map<std::string, std::string> expects = {
{"o0", "a0"}, {"y0", "b0"}, {"z0", "c0"},
};
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -68,21 +68,12 @@ pass_library(transpose_flatten_concat_fuse_pass inference) ...@@ -68,21 +68,12 @@ pass_library(transpose_flatten_concat_fuse_pass inference)
pass_library(identity_scale_op_clean_pass base) pass_library(identity_scale_op_clean_pass base)
pass_library(sync_batch_norm_pass base) pass_library(sync_batch_norm_pass base)
pass_library(runtime_context_cache_pass base) pass_library(runtime_context_cache_pass base)
pass_library(simplify_anakin_detection_pattern_pass inference) pass_library(quant_conv2d_dequant_fuse_pass inference)
pass_library(anakin_fillconstant_elementwisemul_fuse inference) pass_library(fillconstant_elementwisemul_fuse inference)
# There may be many transpose-flatten structures in a model, and the output of if(ANAKIN_FOUND)
# these structures will be used as inputs to the concat Op. This pattern will pass_library(simplify_anakin_priorbox_detection_out_pass inference)
# be detected by our pass. The index here represents the number of structures in the endif()
# pattern. We use index 3 ~ 6, because these quantities of structures are
# common in the models.
foreach (index RANGE 2 6)
file(APPEND ${pass_file} "USE_PASS(transpose_flatten${index}_concat_fuse_pass);\n")
endforeach()
foreach (index RANGE 2 6)
file(APPEND ${pass_file} "USE_PASS(simplify_anakin_detection_pattern_pass${index});\n")
endforeach()
if(WITH_MKLDNN) if(WITH_MKLDNN)
pass_library(mkldnn_placement_pass base mkldnn) pass_library(mkldnn_placement_pass base mkldnn)
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include "paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h" #include "paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/graph_viz_pass.h"
namespace paddle { namespace paddle {
...@@ -29,8 +29,8 @@ namespace ir { ...@@ -29,8 +29,8 @@ namespace ir {
GET_IR_NODE(elementwise_mul); \ GET_IR_NODE(elementwise_mul); \
GET_IR_NODE(elementwise_mul_out); GET_IR_NODE(elementwise_mul_out);
void AnakinFillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const { void FillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const {
const std::string pattern_name = "anakin_fillconstant_elementwisemul_fuse"; const std::string pattern_name = "fillconstant_elementwisemul_fuse";
FusePassBase::Init(pattern_name, graph); FusePassBase::Init(pattern_name, graph);
GraphPatternDetector gpd; GraphPatternDetector gpd;
...@@ -39,8 +39,8 @@ void AnakinFillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const { ...@@ -39,8 +39,8 @@ void AnakinFillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const {
->assert_is_op_input("elementwise_mul", "X") ->assert_is_op_input("elementwise_mul", "X")
->AsInput(); ->AsInput();
patterns::AnakinFillConstantElementWiseMulFuse pattern(gpd.mutable_pattern(), patterns::FillConstantElementWiseMulFuse pattern(gpd.mutable_pattern(),
pattern_name); pattern_name);
pattern(x); pattern(x);
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
...@@ -79,5 +79,5 @@ void AnakinFillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const { ...@@ -79,5 +79,5 @@ void AnakinFillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const {
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
REGISTER_PASS(anakin_fillconstant_elementwisemul_fuse, REGISTER_PASS(fillconstant_elementwisemul_fuse,
paddle::framework::ir::AnakinFillconstantElementwisemulFuse); paddle::framework::ir::FillconstantElementwisemulFuse);
...@@ -21,9 +21,9 @@ namespace paddle { ...@@ -21,9 +21,9 @@ namespace paddle {
namespace framework { namespace framework {
namespace ir { namespace ir {
class AnakinFillconstantElementwisemulFuse : public FusePassBase { class FillconstantElementwisemulFuse : public FusePassBase {
public: public:
virtual ~AnakinFillconstantElementwisemulFuse() {} virtual ~FillconstantElementwisemulFuse() {}
protected: protected:
void ApplyImpl(ir::Graph* graph) const override; void ApplyImpl(ir::Graph* graph) const override;
......
...@@ -1471,7 +1471,8 @@ PDNode *patterns::TransposeFlattenConcat::operator()( ...@@ -1471,7 +1471,8 @@ PDNode *patterns::TransposeFlattenConcat::operator()(
} }
PDNode *patterns::AnakinDetectionPattern::operator()( PDNode *patterns::AnakinDetectionPattern::operator()(
std::vector<PDNode *> conv_in, int times) { std::vector<PDNode *> conv_in, int times, std::string priorbox_type,
bool is_reshape) {
// The times represents the repeat times of the // The times represents the repeat times of the
// {prior_box, prior_box_loc_out, flatten, prior_box_var_out, reshape} // {prior_box, prior_box_loc_out, flatten, prior_box_var_out, reshape}
const int kNumFields = 7; const int kNumFields = 7;
...@@ -1486,37 +1487,38 @@ PDNode *patterns::AnakinDetectionPattern::operator()( ...@@ -1486,37 +1487,38 @@ PDNode *patterns::AnakinDetectionPattern::operator()(
const int kMultiClassSecondInputNmsOffset = times + 1; const int kMultiClassSecondInputNmsOffset = times + 1;
std::vector<PDNode *> nodes; std::vector<PDNode *> nodes;
std::string op_after_priorbox = is_reshape ? "reshape2" : "flatten2";
for (int i = 0; i < times; i++) { for (int i = 0; i < times; i++) {
nodes.push_back( nodes.push_back(
pattern->NewNode(GetNodeName("prior_box" + std::to_string(i))) pattern->NewNode(GetNodeName("prior_box" + std::to_string(i)))
->assert_is_op("density_prior_box")); ->assert_is_op(priorbox_type));
nodes.push_back(pattern->NewNode(GetNodeName("box_out" + std::to_string(i))) nodes.push_back(pattern->NewNode(GetNodeName("box_out" + std::to_string(i)))
->assert_is_op_output("density_prior_box", "Boxes") ->assert_is_op_output(priorbox_type, "Boxes")
->assert_is_op_input("reshape2", "X") ->assert_is_op_input(op_after_priorbox, "X")
->AsIntermediate()); ->AsIntermediate());
nodes.push_back( nodes.push_back(
pattern->NewNode(GetNodeName("reshape1" + std::to_string(i))) pattern->NewNode(GetNodeName("reshape1" + std::to_string(i)))
->assert_is_op("reshape2")); ->assert_is_op(op_after_priorbox));
nodes.push_back( nodes.push_back(
pattern->NewNode(GetNodeName("reshape1_out" + std::to_string(i))) pattern->NewNode(GetNodeName("reshape1_out" + std::to_string(i)))
->assert_is_op_output("reshape2") ->assert_is_op_output(op_after_priorbox)
->assert_is_op_nth_input("concat", "X", i) ->assert_is_op_nth_input("concat", "X", i)
->AsIntermediate()); ->AsIntermediate());
nodes.push_back( nodes.push_back(
pattern->NewNode(GetNodeName("box_var_out" + std::to_string(i))) pattern->NewNode(GetNodeName("box_var_out" + std::to_string(i)))
->assert_is_op_output("density_prior_box", "Variances") ->assert_is_op_output(priorbox_type, "Variances")
->assert_is_op_input("reshape2", "X") ->assert_is_op_input(op_after_priorbox, "X")
->AsIntermediate()); ->AsIntermediate());
nodes.push_back( nodes.push_back(
pattern->NewNode(GetNodeName("reshape2" + std::to_string(i))) pattern->NewNode(GetNodeName("reshape2" + std::to_string(i)))
->assert_is_op("reshape2")); ->assert_is_op(op_after_priorbox));
nodes.push_back( nodes.push_back(
pattern->NewNode(GetNodeName("reshape2_out" + std::to_string(i))) pattern->NewNode(GetNodeName("reshape2_out" + std::to_string(i)))
->assert_is_op_output("reshape2") ->assert_is_op_output(op_after_priorbox)
->assert_is_op_nth_input("concat", "X", i) ->assert_is_op_nth_input("concat", "X", i)
->AsIntermediate()); ->AsIntermediate());
} }
...@@ -1612,7 +1614,7 @@ PDNode *patterns::AnakinDetectionPattern::operator()( ...@@ -1612,7 +1614,7 @@ PDNode *patterns::AnakinDetectionPattern::operator()(
return multiclass_nms_out; return multiclass_nms_out;
} }
PDNode *patterns::AnakinFillConstantElementWiseMulFuse::operator()( PDNode *patterns::FillConstantElementWiseMulFuse::operator()(
PDNode *elementwise_op_input) { PDNode *elementwise_op_input) {
auto fill_constant = auto fill_constant =
pattern->NewNode(fill_constant_repr())->assert_is_op("fill_constant"); pattern->NewNode(fill_constant_repr())->assert_is_op("fill_constant");
...@@ -1635,6 +1637,76 @@ PDNode *patterns::AnakinFillConstantElementWiseMulFuse::operator()( ...@@ -1635,6 +1637,76 @@ PDNode *patterns::AnakinFillConstantElementWiseMulFuse::operator()(
return elementwise_mul_out; return elementwise_mul_out;
} }
void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input,
const std::string &op_type,
const std::string &weight_name,
int times) {
const int kNumFields = 5;
const int kQuantizedWeightOffset = 0;
const int kQuantizedOpOffset = 1;
const int kQuantizedOpOutOffset = 2;
const int kDequantOpOffset = 3;
const int kDequantOpOutOffset = 4;
// the quant op always be one.
auto quant_op_in_scale =
pattern->NewNode(GetNodeName("quant_op_in_scale"))
->assert_is_op_input("fake_quantize_range_abs_max", "InScale")
->AsInput();
auto quant_op = pattern->NewNode(GetNodeName("quant_op"))
->assert_is_op("fake_quantize_range_abs_max");
auto quant_op_out_scale =
pattern->NewNode(GetNodeName("quant_op_out_scale"))
->assert_is_op_output("fake_quantize_range_abs_max", "OutScale")
->assert_is_op_input("fake_dequantize_max_abs", "Scale")
->AsIntermediate();
auto quant_op_out =
pattern->NewNode(GetNodeName("quant_op_out"))
->assert_is_op_output("fake_quantize_range_abs_max", "Out")
->assert_is_op_input(op_type)
->AsIntermediate();
// there are 'times' quantized and dequant op
std::vector<PDNode *> nodes;
for (int i = 0; i < times; i++) {
nodes.push_back(
pattern->NewNode(GetNodeName("quantized_op_weight") + std::to_string(i))
->assert_is_op_input(op_type, weight_name)
->AsInput());
nodes.push_back(
pattern->NewNode(GetNodeName("quantized_op") + std::to_string(i))
->assert_is_op(op_type));
nodes.push_back(
pattern->NewNode(GetNodeName("quantized_op_out") + std::to_string(i))
->assert_is_op_output(op_type)
->assert_is_op_input("fake_dequantize_max_abs", "X")
->AsIntermediate());
nodes.push_back(
pattern->NewNode(GetNodeName("dequant_op") + std::to_string(i))
->assert_is_op("fake_dequantize_max_abs"));
nodes.push_back(
pattern->NewNode(GetNodeName("dequant_op_out") + std::to_string(i))
->assert_is_op_output("fake_dequantize_max_abs", "Out")
->AsOutput());
}
quant_op->LinksFrom({quant_op_input, quant_op_in_scale});
quant_op_out->LinksFrom({quant_op});
for (int i = 0; i < times; i++) {
nodes[i * kNumFields + kQuantizedOpOffset]->LinksFrom(
{quant_op_out, nodes[i * kNumFields + kQuantizedWeightOffset]});
nodes[i * kNumFields + kQuantizedOpOutOffset]->LinksFrom(
{nodes[i * kNumFields + kQuantizedOpOffset]});
nodes[i * kNumFields + kDequantOpOffset]->LinksFrom(
{nodes[i * kNumFields + kQuantizedOpOutOffset], quant_op_out_scale});
nodes[i * kNumFields + kDequantOpOutOffset]->LinksFrom(
{nodes[i * kNumFields + kDequantOpOffset]});
}
}
} // namespace ir } // namespace ir
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -848,7 +848,8 @@ struct AnakinDetectionPattern : public PatternBase { ...@@ -848,7 +848,8 @@ struct AnakinDetectionPattern : public PatternBase {
AnakinDetectionPattern(PDPattern* pattern, const std::string& name_scope) AnakinDetectionPattern(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "anakin_detect_pattern") {} : PatternBase(pattern, name_scope, "anakin_detect_pattern") {}
PDNode* operator()(std::vector<PDNode*> conv_inputs, int times); PDNode* operator()(std::vector<PDNode*> conv_inputs, int times,
std::string priorbox_type, bool is_reshape);
std::string GetNodeName(const std::string& op_type) { std::string GetNodeName(const std::string& op_type) {
return PDNodeName(name_scope_, repr_, id_, op_type); return PDNodeName(name_scope_, repr_, id_, op_type);
...@@ -859,9 +860,9 @@ struct AnakinDetectionPattern : public PatternBase { ...@@ -859,9 +860,9 @@ struct AnakinDetectionPattern : public PatternBase {
} }
}; };
struct AnakinFillConstantElementWiseMulFuse : public PatternBase { struct FillConstantElementWiseMulFuse : public PatternBase {
AnakinFillConstantElementWiseMulFuse(PDPattern* pattern, FillConstantElementWiseMulFuse(PDPattern* pattern,
const std::string& name_scope) const std::string& name_scope)
: PatternBase(pattern, name_scope, : PatternBase(pattern, name_scope,
"anakin_fillconstant_elementwisemul_fuse") {} "anakin_fillconstant_elementwisemul_fuse") {}
...@@ -874,6 +875,22 @@ struct AnakinFillConstantElementWiseMulFuse : public PatternBase { ...@@ -874,6 +875,22 @@ struct AnakinFillConstantElementWiseMulFuse : public PatternBase {
PATTERN_DECL_NODE(elementwise_mul_out); PATTERN_DECL_NODE(elementwise_mul_out);
}; };
struct QuantDequantOpFuse : public PatternBase {
QuantDequantOpFuse(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "quant_dequant_fuse") {}
void operator()(PDNode* quant_op_input, const std::string& op_name,
const std::string& weight_name, int times = 1);
std::string GetNodeName(const std::string& op_type) {
return PDNodeName(name_scope_, repr_, id_, op_type);
}
PDNode* GetPDNode(const std::string& op_type) {
return pattern->RetrieveNode(GetNodeName(op_type));
}
};
} // namespace patterns } // namespace patterns
// Link two ir::Nodes from each other. // Link two ir::Nodes from each other.
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.h"
namespace paddle {
namespace framework {
namespace ir {
void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
std::string op_type) {
const std::string pattern_name = "quant_dequant_fuse";
// FusePassBase::Init(pattern_name, graph);
const int kNumFields = 5;
const int kQuantizedWeightOffset = 0;
const int kQuantizedOpOffset = 1;
const int kQuantizedOpOutOffset = 2;
const int kDequantOpOffset = 3;
const int kDequantOpOutOffset = 4;
GraphPatternDetector gpd;
auto* x = gpd.mutable_pattern()
->NewNode("x")
->assert_is_op_input("fake_quantize_range_abs_max", "X")
->AsInput();
std::string quantized_op_type = "";
std::string weight_name = "";
if (op_type == "conv2d") {
quantized_op_type = "conv2d";
weight_name = "Filter";
} else if (op_type == "conv2d_fusion") {
quantized_op_type = "conv2d_fusion";
weight_name = "Filter";
} else if (op_type == "mul") {
quantized_op_type = "mul";
weight_name = "Y";
} else if (op_type == "fc") {
quantized_op_type = "fc";
weight_name = "W";
} else {
PADDLE_ENFORCE(
"QuantDequantFuse: We only support conv2d, conv2d_fusion, fc, mul for "
"now.");
}
patterns::QuantDequantOpFuse pattern(gpd.mutable_pattern(), pattern_name);
pattern(x, quantized_op_type, weight_name, times);
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
PADDLE_ENFORCE(subgraph.count(x));
auto* input_node = subgraph.at(x);
Node* quant_op_in_scale =
subgraph.at(pattern.GetPDNode("quant_op_in_scale"));
Node* quant_op = subgraph.at(pattern.GetPDNode("quant_op"));
Node* quant_op_out_scale =
subgraph.at(pattern.GetPDNode("quant_op_out_scale"));
Node* quant_op_out = subgraph.at(pattern.GetPDNode("quant_op_out"));
std::vector<Node*> nodes;
for (int i = 0; i < times; i++) {
nodes.push_back(subgraph.at(
pattern.GetPDNode("quantized_op_weight" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("quantized_op" + std::to_string(i))));
nodes.push_back(subgraph.at(
pattern.GetPDNode("quantized_op_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("dequant_op" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("dequant_op_out" + std::to_string(i))));
}
int bit_length = boost::get<int>(quant_op->Op()->GetAttr("bit_length"));
int range = ((1 << (bit_length - 1)) - 1);
// Prepare input scale
std::string input_scale_var_name = quant_op->Op()->Input("InScale").front();
PADDLE_ENFORCE(scope);
const LoDTensor& input_scale_tensor =
scope->FindVar(input_scale_var_name)->Get<LoDTensor>();
PADDLE_ENFORCE(paddle::platform::is_cpu_place(input_scale_tensor.place()));
const float* input_scale_data = input_scale_tensor.data<float>();
float input_scale = input_scale_data[0];
std::unordered_set<const Node*> delete_nodes;
for (int i = 0; i < times; i++) {
// max_range = (range * range) / weight_scale
float max_range = boost::get<float>(
nodes[i * kNumFields + kDequantOpOffset]->Op()->GetAttr("max_range"));
float weight_scale = (range * range) / max_range;
auto base_op_desc =
*nodes[i * kNumFields + kQuantizedOpOffset]->Op()->Proto();
std::string new_input = input_node->Name();
std::string new_output =
nodes[i * kNumFields + kDequantOpOutOffset]->Name();
framework::OpDesc new_op_desc(base_op_desc, nullptr);
new_op_desc.SetType(quantized_op_type);
if (quantized_op_type == "conv2d" ||
quantized_op_type == "conv2d_fusion") {
new_op_desc.SetInput("Input", {new_input});
new_op_desc.SetOutput("Output", {new_output});
} else if (quantized_op_type == "fc") {
new_op_desc.SetInput("Input", {new_input});
new_op_desc.SetOutput("Out", {new_output});
} else if (quantized_op_type == "mul") {
new_op_desc.SetInput("X", {new_input});
new_op_desc.SetOutput("Out", {new_output});
}
new_op_desc.SetAttr("enable_int8", true);
new_op_desc.SetAttr("input_scale", input_scale);
new_op_desc.SetAttr("weight_scale", weight_scale);
new_op_desc.Flush();
auto* new_op = graph->CreateOpNode(&new_op_desc);
IR_NODE_LINK_TO(input_node, new_op);
IR_NODE_LINK_TO(nodes[i * kNumFields + kQuantizedWeightOffset], new_op);
IR_NODE_LINK_TO(new_op, nodes[i * kNumFields + kDequantOpOutOffset]);
delete_nodes.insert(nodes[i * kNumFields + kQuantizedOpOffset]);
delete_nodes.insert(nodes[i * kNumFields + kQuantizedOpOutOffset]);
delete_nodes.insert(nodes[i * kNumFields + kDequantOpOffset]);
}
delete_nodes.insert(quant_op_in_scale);
delete_nodes.insert(quant_op);
delete_nodes.insert(quant_op_out);
delete_nodes.insert(quant_op_out_scale);
// Delete the unneeded nodes.
GraphSafeRemoveNodes(graph, delete_nodes);
};
gpd(graph, handler);
}
void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
const std::string pattern_name = "quant_dequant_fuse";
FusePassBase::Init(pattern_name, graph);
std::unordered_set<std::string> quantized_op_types = {"conv2d", "mul"};
auto* scope = param_scope();
for (auto& op_type : quantized_op_types) {
for (int i = 1; i <= 6; i++) {
RunQuantDequant(graph, scope, i, op_type);
}
}
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(quant_conv2d_dequant_fuse_pass,
paddle::framework::ir::QuantDequantFusePass);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace paddle {
namespace framework {
namespace ir {
class QuantDequantFusePass : public FusePassBase {
public:
virtual ~QuantDequantFusePass() {}
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
...@@ -17,25 +17,24 @@ ...@@ -17,25 +17,24 @@
#include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h" #include "paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace ir { namespace ir {
template <int times> void RunSimplifyAnakinDetection(ir::Graph *graph, int times, bool is_density,
void SimplifyAnakinDetectionPatternPass<times>::ApplyImpl( bool is_reshape) {
ir::Graph *graph) const {
const std::string pattern_name = const std::string pattern_name =
"simplify_anakin_detection_pattern_pass" + std::to_string(times); "simplify_anakin_detection_pattern_pass" + std::to_string(times);
FusePassBase::Init(pattern_name, graph); std::string priorbox_type = is_density ? "density_prior_box" : "prior_box";
GraphPatternDetector gpd; GraphPatternDetector gpd;
std::vector<PDNode *> input_nodes; std::vector<PDNode *> input_nodes;
for (int i = 0; i < times; i++) { for (int i = 0; i < times; i++) {
input_nodes.push_back(gpd.mutable_pattern() input_nodes.push_back(gpd.mutable_pattern()
->NewNode("x" + std::to_string(i)) ->NewNode("x" + std::to_string(i))
->assert_is_op_input("density_prior_box", "Input") ->assert_is_op_input(priorbox_type, "Input")
->AsInput()); ->AsInput());
} }
input_nodes.push_back(gpd.mutable_pattern() input_nodes.push_back(gpd.mutable_pattern()
...@@ -49,7 +48,7 @@ void SimplifyAnakinDetectionPatternPass<times>::ApplyImpl( ...@@ -49,7 +48,7 @@ void SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
->AsInput()); ->AsInput());
patterns::AnakinDetectionPattern pattern(gpd.mutable_pattern(), pattern_name); patterns::AnakinDetectionPattern pattern(gpd.mutable_pattern(), pattern_name);
pattern(input_nodes, times); pattern(input_nodes, times, priorbox_type, is_reshape);
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) { Graph *g) {
...@@ -119,8 +118,7 @@ void SimplifyAnakinDetectionPatternPass<times>::ApplyImpl( ...@@ -119,8 +118,7 @@ void SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
boost::get<std::string>(box_coder_op->Op()->GetAttr("code_type")); boost::get<std::string>(box_coder_op->Op()->GetAttr("code_type"));
bool box_normalized = bool box_normalized =
boost::get<bool>(box_coder_op->Op()->GetAttr("box_normalized")); boost::get<bool>(box_coder_op->Op()->GetAttr("box_normalized"));
// auto variance =
// boost::get<std::vector<float>>(box_coder_op->Op()->GetAttr("variance"));
int background_label = int background_label =
boost::get<int>(multiclass_nms->Op()->GetAttr("background_label")); boost::get<int>(multiclass_nms->Op()->GetAttr("background_label"));
float score_threshold = float score_threshold =
...@@ -138,7 +136,6 @@ void SimplifyAnakinDetectionPatternPass<times>::ApplyImpl( ...@@ -138,7 +136,6 @@ void SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
nodes[i * kNumFields + kPriorBoxLocOffset]->Name()); nodes[i * kNumFields + kPriorBoxLocOffset]->Name());
} }
// int axis = boost::get<int>(concat_op1->Op()->GetAttr("axis"));
framework::OpDesc concat1_desc; framework::OpDesc concat1_desc;
concat1_desc.SetType("concat"); concat1_desc.SetType("concat");
concat1_desc.SetInput("X", concat1_input_names); concat1_desc.SetInput("X", concat1_input_names);
...@@ -213,31 +210,24 @@ void SimplifyAnakinDetectionPatternPass<times>::ApplyImpl( ...@@ -213,31 +210,24 @@ void SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
gpd(graph, handler); gpd(graph, handler);
} }
template class SimplifyAnakinDetectionPatternPass<1>; void SimplifyAnakinDetectionPatternPass::ApplyImpl(ir::Graph *graph) const {
template class SimplifyAnakinDetectionPatternPass<2>; const int pattern_nums = 6;
template class SimplifyAnakinDetectionPatternPass<3>; const std::string pattern_name = "simplify_anakin_detection_pattern_pass";
template class SimplifyAnakinDetectionPatternPass<4>; FusePassBase::Init(pattern_name, graph);
template class SimplifyAnakinDetectionPatternPass<5>; std::vector<bool> options = {true, false};
template class SimplifyAnakinDetectionPatternPass<6>; for (const auto &is_density : options) {
for (const auto &is_reshape : options) {
for (int i = 1; i <= pattern_nums; i++) {
RunSimplifyAnakinDetection(graph, i, is_density, is_reshape);
}
}
}
}
} // namespace ir } // namespace ir
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
REGISTER_PASS(simplify_anakin_detection_pattern_pass, typedef paddle::framework::ir::SimplifyAnakinDetectionPatternPass
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<1>); priorbox_pattern;
REGISTER_PASS(simplify_anakin_priorbox_detection_out_pass, priorbox_pattern);
REGISTER_PASS(simplify_anakin_detection_pattern_pass2,
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<2>);
REGISTER_PASS(simplify_anakin_detection_pattern_pass3,
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<3>);
REGISTER_PASS(simplify_anakin_detection_pattern_pass4,
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<4>);
REGISTER_PASS(simplify_anakin_detection_pattern_pass5,
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<5>);
REGISTER_PASS(simplify_anakin_detection_pattern_pass6,
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<6>);
...@@ -26,7 +26,6 @@ namespace ir { ...@@ -26,7 +26,6 @@ namespace ir {
// these structures will be used as inputs to the concat Op. This pattern will // these structures will be used as inputs to the concat Op. This pattern will
// be detected by our pass. The times here represents the repeat times of this // be detected by our pass. The times here represents the repeat times of this
// structure. // structure.
template <int times>
class SimplifyAnakinDetectionPatternPass : public FusePassBase { class SimplifyAnakinDetectionPatternPass : public FusePassBase {
public: public:
virtual ~SimplifyAnakinDetectionPatternPass() {} virtual ~SimplifyAnakinDetectionPatternPass() {}
......
...@@ -25,11 +25,9 @@ namespace paddle { ...@@ -25,11 +25,9 @@ namespace paddle {
namespace framework { namespace framework {
namespace ir { namespace ir {
template <int times> void RunTransposeFlattenConcatFuse(ir::Graph *graph, int times) {
void TransposeFlattenConcatFusePass<times>::ApplyImpl(ir::Graph *graph) const {
const std::string pattern_name = const std::string pattern_name =
"transpose_flatten" + std::to_string(times) + "_concat_fuse"; "transpose_flatten" + std::to_string(times) + "_concat_fuse";
FusePassBase::Init(pattern_name, graph);
GraphPatternDetector gpd; GraphPatternDetector gpd;
std::vector<PDNode *> input_nodes; std::vector<PDNode *> input_nodes;
...@@ -122,31 +120,18 @@ void TransposeFlattenConcatFusePass<times>::ApplyImpl(ir::Graph *graph) const { ...@@ -122,31 +120,18 @@ void TransposeFlattenConcatFusePass<times>::ApplyImpl(ir::Graph *graph) const {
gpd(graph, handler); gpd(graph, handler);
} }
template class TransposeFlattenConcatFusePass<1>; void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const {
template class TransposeFlattenConcatFusePass<2>; const int pattern_nums = 6;
template class TransposeFlattenConcatFusePass<3>; const std::string pattern_name = "transpose_flatten_concat_fuse";
template class TransposeFlattenConcatFusePass<4>; FusePassBase::Init(pattern_name, graph);
template class TransposeFlattenConcatFusePass<5>; for (int i = 1; i <= pattern_nums; i++) {
template class TransposeFlattenConcatFusePass<6>; RunTransposeFlattenConcatFuse(graph, i);
}
}
} // namespace ir } // namespace ir
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
REGISTER_PASS(transpose_flatten_concat_fuse_pass, REGISTER_PASS(transpose_flatten_concat_fuse_pass,
paddle::framework::ir::TransposeFlattenConcatFusePass<1>); paddle::framework::ir::TransposeFlattenConcatFusePass);
REGISTER_PASS(transpose_flatten2_concat_fuse_pass,
paddle::framework::ir::TransposeFlattenConcatFusePass<2>);
REGISTER_PASS(transpose_flatten3_concat_fuse_pass,
paddle::framework::ir::TransposeFlattenConcatFusePass<3>);
REGISTER_PASS(transpose_flatten4_concat_fuse_pass,
paddle::framework::ir::TransposeFlattenConcatFusePass<4>);
REGISTER_PASS(transpose_flatten5_concat_fuse_pass,
paddle::framework::ir::TransposeFlattenConcatFusePass<5>);
REGISTER_PASS(transpose_flatten6_concat_fuse_pass,
paddle::framework::ir::TransposeFlattenConcatFusePass<6>);
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <memory>
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
...@@ -24,7 +26,6 @@ namespace ir { ...@@ -24,7 +26,6 @@ namespace ir {
// these structures will be used as inputs to the concat Op. This pattern will // these structures will be used as inputs to the concat Op. This pattern will
// be detected by our pass. The times here represents the repeat times of this // be detected by our pass. The times here represents the repeat times of this
// structure. // structure.
template <int times>
class TransposeFlattenConcatFusePass : public FusePassBase { class TransposeFlattenConcatFusePass : public FusePassBase {
public: public:
virtual ~TransposeFlattenConcatFusePass() {} virtual ~TransposeFlattenConcatFusePass() {}
......
...@@ -56,8 +56,8 @@ proto::VarType::Type GetDataTypeOfVar(const Variable* var) { ...@@ -56,8 +56,8 @@ proto::VarType::Type GetDataTypeOfVar(const Variable* var) {
} }
} }
static DDim GetDims(const Scope& scope, const std::string& name, static DDim GetDimsDebug(const Scope& scope, const std::string& name,
bool get_actual_dim = false) { bool get_actual_dim = false) {
Variable* var = scope.FindVar(name); Variable* var = scope.FindVar(name);
if (var == nullptr) { if (var == nullptr) {
return DDim({-1}); return DDim({-1});
...@@ -65,9 +65,9 @@ static DDim GetDims(const Scope& scope, const std::string& name, ...@@ -65,9 +65,9 @@ static DDim GetDims(const Scope& scope, const std::string& name,
if (var->IsType<LoDTensor>()) { if (var->IsType<LoDTensor>()) {
const LoDTensor& tensor = var->Get<LoDTensor>(); const LoDTensor& tensor = var->Get<LoDTensor>();
// if (UNLIKELY(!tensor.IsInitialized())) { if (UNLIKELY(!tensor.IsInitialized())) {
// return DDim({-1}); return DDim({-1});
// } }
return tensor.dims(); return tensor.dims();
} else if (var->IsType<SelectedRows>()) { } else if (var->IsType<SelectedRows>()) {
if (get_actual_dim) { if (get_actual_dim) {
...@@ -123,7 +123,7 @@ static int GetRowSize(const Scope& scope, const std::string& name) { ...@@ -123,7 +123,7 @@ static int GetRowSize(const Scope& scope, const std::string& name) {
return -1; return -1;
} }
static LoD GetLoD(const Scope& scope, const std::string& name) { static LoD GetLoDDebug(const Scope& scope, const std::string& name) {
Variable* var = scope.FindVar(name); Variable* var = scope.FindVar(name);
auto default_lod = LoD({{}}); auto default_lod = LoD({{}});
...@@ -133,9 +133,9 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { ...@@ -133,9 +133,9 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
if (var->IsType<LoDTensor>()) { if (var->IsType<LoDTensor>()) {
const LoDTensor& tensor = var->Get<LoDTensor>(); const LoDTensor& tensor = var->Get<LoDTensor>();
// if (UNLIKELY(!tensor.IsInitialized())) { if (UNLIKELY(!tensor.IsInitialized())) {
// return default_lod; return default_lod;
// } }
return tensor.lod(); return tensor.lod();
} else { } else {
return default_lod; return default_lod;
...@@ -274,8 +274,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { ...@@ -274,8 +274,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
} }
std::string dtype = GetDtype(*scope, var_name); std::string dtype = GetDtype(*scope, var_name);
ss << ":" << dtype; ss << ":" << dtype;
ss << "[" << GetDims(*scope, var_name, true) << "]"; ss << "[" << GetDimsDebug(*scope, var_name, true) << "]";
ss << "(" << GetLoD(*scope, var_name) << ")"; ss << "(" << GetLoDDebug(*scope, var_name) << ")";
} }
} }
if (i != input.second.size() - 1) { if (i != input.second.size() - 1) {
...@@ -305,8 +305,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { ...@@ -305,8 +305,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
} }
std::string dtype = GetDtype(*scope, output.second[i]); std::string dtype = GetDtype(*scope, output.second[i]);
ss << ":" << dtype; ss << ":" << dtype;
ss << "[" << GetDims(*scope, var_name, true) << "]"; ss << "[" << GetDimsDebug(*scope, var_name, true) << "]";
ss << "(" << GetLoD(*scope, var_name) << ")"; ss << "(" << GetLoDDebug(*scope, var_name) << ")";
} }
} }
if (i != output.second.size() - 1) { if (i != output.second.size() - 1) {
...@@ -1017,7 +1017,7 @@ Scope* OperatorWithKernel::PrepareData( ...@@ -1017,7 +1017,7 @@ Scope* OperatorWithKernel::PrepareData(
// of search key even though the set is empty. // of search key even though the set is empty.
if (!no_buffer_ins.empty() && if (!no_buffer_ins.empty() &&
no_buffer_ins.count(var_name_item.first) > 0) { no_buffer_ins.count(var_name_item.first) > 0) {
VLOG(1) << "Skip scanning input " << var_name_item.first VLOG(7) << "Skip scanning input " << var_name_item.first
<< " in Operator " << type_; << " in Operator " << type_;
continue; continue;
} }
......
...@@ -34,25 +34,41 @@ void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op, ...@@ -34,25 +34,41 @@ void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op,
auto input_name = op_desc.Input("Input").front(); auto input_name = op_desc.Input("Input").front();
auto image_name = op_desc.Input("Image").front(); auto image_name = op_desc.Input("Image").front();
auto output_name = op_desc.Output("Boxes").front(); auto output_name = op_desc.Output("Boxes").front();
auto op_type = op_desc.Type();
auto op_name = op_type + ":" + op_desc.Output("Boxes").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Boxes").front(); // only for density_prior_box
std::vector<float> fixed_sizes = {};
std::vector<float> fixed_ratios = {};
std::vector<int> densities = {};
auto fixed_sizes = std::vector<float> min_sizes = {};
boost::get<std::vector<float>>(op_desc.GetAttr("fixed_sizes")); std::vector<float> max_sizes = {};
auto fixed_ratios = std::vector<float> aspect_ratios = {};
boost::get<std::vector<float>>(op_desc.GetAttr("fixed_ratios")); bool is_clip = false;
auto densities = boost::get<std::vector<int>>(op_desc.GetAttr("densities")); bool is_flip = false;
if (op_type == "density_prior_box") {
fixed_sizes =
boost::get<std::vector<float>>(op_desc.GetAttr("fixed_sizes"));
fixed_ratios =
boost::get<std::vector<float>>(op_desc.GetAttr("fixed_ratios"));
densities = boost::get<std::vector<int>>(op_desc.GetAttr("densities"));
is_clip = boost::get<bool>(op_desc.GetAttr("clip"));
} else if (op_type == "prior_box") {
min_sizes = boost::get<std::vector<float>>(op_desc.GetAttr("min_sizes"));
max_sizes = boost::get<std::vector<float>>(op_desc.GetAttr("max_sizes"));
aspect_ratios =
boost::get<std::vector<float>>(op_desc.GetAttr("aspect_ratios"));
is_clip = boost::get<bool>(op_desc.GetAttr("clip"));
is_flip = boost::get<bool>(op_desc.GetAttr("flip"));
}
std::vector<float> dens; std::vector<float> dens;
for (auto& ele : densities) { for (auto& ele : densities) {
dens.push_back(static_cast<float>(ele)); dens.push_back(static_cast<float>(ele));
} }
// lack flip
// auto clip = boost::get<bool>(op_desc.GetAttr("clip"));
auto variances = boost::get<std::vector<float>>(op_desc.GetAttr("variances")); auto variances = boost::get<std::vector<float>>(op_desc.GetAttr("variances"));
for (auto& ele : variances) {
LOG(INFO) << ele;
}
// lack img_h, img_w // lack img_h, img_w
auto step_h = boost::get<float>(op_desc.GetAttr("step_h")); auto step_h = boost::get<float>(op_desc.GetAttr("step_h"));
...@@ -66,14 +82,14 @@ void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op, ...@@ -66,14 +82,14 @@ void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op,
std::vector<float> temp_v = {}; std::vector<float> temp_v = {};
engine_->AddOp(op_name, "PriorBox", {input_name, image_name}, {output_name}); engine_->AddOp(op_name, "PriorBox", {input_name, image_name}, {output_name});
engine_->AddOpAttr<PTuple<float>>(op_name, "min_size", temp_v); engine_->AddOpAttr<PTuple<float>>(op_name, "min_size", min_sizes);
engine_->AddOpAttr<PTuple<float>>(op_name, "max_size", temp_v); engine_->AddOpAttr<PTuple<float>>(op_name, "max_size", max_sizes);
engine_->AddOpAttr<PTuple<float>>(op_name, "aspect_ratio", temp_v); engine_->AddOpAttr<PTuple<float>>(op_name, "aspect_ratio", aspect_ratios);
engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_size", fixed_sizes); engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_size", fixed_sizes);
engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_ratio", fixed_ratios); engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_ratio", fixed_ratios);
engine_->AddOpAttr<PTuple<float>>(op_name, "density", dens); engine_->AddOpAttr<PTuple<float>>(op_name, "density", dens);
engine_->AddOpAttr(op_name, "is_flip", static_cast<bool>(false)); engine_->AddOpAttr(op_name, "is_flip", is_flip);
engine_->AddOpAttr(op_name, "is_clip", static_cast<bool>(false)); engine_->AddOpAttr(op_name, "is_clip", is_clip);
engine_->AddOpAttr<PTuple<float>>(op_name, "variance", variances); engine_->AddOpAttr<PTuple<float>>(op_name, "variance", variances);
engine_->AddOpAttr(op_name, "img_h", static_cast<int>(0)); engine_->AddOpAttr(op_name, "img_h", static_cast<int>(0));
engine_->AddOpAttr(op_name, "img_w", static_cast<int>(0)); engine_->AddOpAttr(op_name, "img_w", static_cast<int>(0));
...@@ -88,3 +104,4 @@ void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op, ...@@ -88,3 +104,4 @@ void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op,
} // namespace paddle } // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(density_prior_box, DensityPriorBoxOpConverter); REGISTER_ANAKIN_OP_CONVERTER(density_prior_box, DensityPriorBoxOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(prior_box, DensityPriorBoxOpConverter);
...@@ -48,7 +48,7 @@ class AnakinOpConverter { ...@@ -48,7 +48,7 @@ class AnakinOpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
std::string op_type = op_desc.Type(); std::string op_type = op_desc.Type();
AnakinOpConverter *it = nullptr; AnakinOpConverter *it = nullptr;
if (op_type == "depthwise_conv2d") op_type = "conv2d";
if (op_type == "reshape2") op_type = "reshape"; if (op_type == "reshape2") op_type = "reshape";
if (op_type == "transpose2") op_type = "transpose"; if (op_type == "transpose2") op_type = "transpose";
if (op_type == "flatten2") op_type = "flatten"; if (op_type == "flatten2") op_type = "flatten";
......
...@@ -42,6 +42,8 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -42,6 +42,8 @@ struct SimpleOpTypeSetTeller : public Teller {
teller_set.insert("dropout"); teller_set.insert("dropout");
teller_set.insert("sigmoid"); teller_set.insert("sigmoid");
teller_set.insert("sum"); teller_set.insert("sum");
teller_set.insert("depthwise_conv2d");
teller_set.insert("prior_box");
} }
bool operator()(const std::string& op_type, bool operator()(const std::string& op_type,
......
...@@ -37,14 +37,14 @@ using framework::ir::Node; ...@@ -37,14 +37,14 @@ using framework::ir::Node;
void analysis::AnakinSubgraphPass::ApplyImpl( void analysis::AnakinSubgraphPass::ApplyImpl(
framework::ir::Graph *graph) const { framework::ir::Graph *graph) const {
framework::ir::FusePassBase::Init("anakin_subgraph_pass", graph.get()); framework::ir::FusePassBase::Init("anakin_subgraph_pass", graph);
auto teller = [](const framework::ir::Node *node) { auto teller = [](const framework::ir::Node *node) {
if (!node->IsOp() || !node->Op()) return false; if (!node->IsOp() || !node->Op()) return false;
return anakin::OpTeller::Global().Tell(node->Op()->Type(), *node->Op()); return anakin::OpTeller::Global().Tell(node->Op()->Type(), *node->Op());
}; };
SubGraphFuser fuser(graph.get(), teller, 6 /* min_subgraph_size */); SubGraphFuser fuser(graph, teller, 6 /* min_subgraph_size */);
fuser(); fuser();
std::vector<std::string> graph_param_names = std::vector<std::string> graph_param_names =
...@@ -56,10 +56,10 @@ void analysis::AnakinSubgraphPass::ApplyImpl( ...@@ -56,10 +56,10 @@ void analysis::AnakinSubgraphPass::ApplyImpl(
for (auto *node : graph->Nodes()) { for (auto *node : graph->Nodes()) {
if (node->IsOp() && !Agent(node).subgraph()->empty()) { if (node->IsOp() && !Agent(node).subgraph()->empty()) {
CreateAnakinOp(node, graph.get(), graph_param_names, &repetitive_params); CreateAnakinOp(node, graph, graph_param_names, &repetitive_params);
std::unordered_set<const Node *> nodes2remove( std::unordered_set<const Node *> nodes2remove(
Agent(node).subgraph()->begin(), Agent(node).subgraph()->end()); Agent(node).subgraph()->begin(), Agent(node).subgraph()->end());
framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove); framework::ir::GraphSafeRemoveNodes(graph, nodes2remove);
} }
} }
...@@ -69,7 +69,7 @@ void analysis::AnakinSubgraphPass::ApplyImpl( ...@@ -69,7 +69,7 @@ void analysis::AnakinSubgraphPass::ApplyImpl(
nodes2remove.insert(node); nodes2remove.insert(node);
} }
} }
framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove); framework::ir::GraphSafeRemoveNodes(graph, nodes2remove);
graph->Set(framework::ir::kRepetitiveParamAttr, graph->Set(framework::ir::kRepetitiveParamAttr,
new std::vector<std::string>(repetitive_params)); new std::vector<std::string>(repetitive_params));
} }
......
...@@ -192,6 +192,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -192,6 +192,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
block_desc.Proto()->SerializeAsString()); block_desc.Proto()->SerializeAsString());
SetAttr(op_desc->Proto(), "max_batch_size", Get<int>("max_batch_size")); SetAttr(op_desc->Proto(), "max_batch_size", Get<int>("max_batch_size"));
SetAttr(op_desc->Proto(), "workspace_size", Get<int>("workspace_size")); SetAttr(op_desc->Proto(), "workspace_size", Get<int>("workspace_size"));
SetAttr(op_desc->Proto(), "gpu_id", Get<int>("gpu_device_id"));
SetAttr(op_desc->Proto(), "output_name_mapping", output_mapping); SetAttr(op_desc->Proto(), "output_name_mapping", output_mapping);
SetAttr(op_desc->Proto(), "parameters", params); SetAttr(op_desc->Proto(), "parameters", params);
......
...@@ -52,6 +52,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { ...@@ -52,6 +52,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
for (auto &var_name : all_vars) { for (auto &var_name : all_vars) {
if (std::count(repetitive_params.begin(), repetitive_params.end(), if (std::count(repetitive_params.begin(), repetitive_params.end(),
var_name)) { var_name)) {
scope->EraseVars({var_name});
continue; continue;
} }
auto *var = scope->FindLocalVar(var_name); auto *var = scope->FindLocalVar(var_name);
......
...@@ -886,4 +886,5 @@ USE_ANAKIN_CONVERTER(detection_out); ...@@ -886,4 +886,5 @@ USE_ANAKIN_CONVERTER(detection_out);
USE_ANAKIN_CONVERTER(density_prior_box); USE_ANAKIN_CONVERTER(density_prior_box);
USE_ANAKIN_CONVERTER(dropout); USE_ANAKIN_CONVERTER(dropout);
USE_ANAKIN_CONVERTER(sum); USE_ANAKIN_CONVERTER(sum);
USE_ANAKIN_CONVERTER(prior_box);
#endif #endif
...@@ -70,17 +70,15 @@ void GpuPassStrategy::EnableMKLDNN() { ...@@ -70,17 +70,15 @@ void GpuPassStrategy::EnableMKLDNN() {
// The following passes works for Anakin sub-graph engine. // The following passes works for Anakin sub-graph engine.
const std::vector<std::string> kAnakinSubgraphPasses({ const std::vector<std::string> kAnakinSubgraphPasses({
"infer_clean_graph_pass", // "infer_clean_graph_pass", //
"simplify_anakin_detection_pattern_pass5", // "simplify_anakin_priorbox_detection_out_pass", //
"simplify_anakin_detection_pattern_pass4", // "fillconstant_elementwisemul_fuse", //
"simplify_anakin_detection_pattern_pass3", // "fc_fuse_pass", //
"simplify_anakin_detection_pattern_pass2", // "conv_elementwise_add_fuse_pass", //
"anakin_fillconstant_elementwisemul_fuse", // "conv_bn_fuse_pass", //
"fc_fuse_pass", // "conv_elementwise_add_fuse_pass", //
"conv_elementwise_add_fuse_pass", // "fc_gru_fuse_pass", //
"conv_bn_fuse_pass", // "quant_conv2d_dequant_fuse_pass", //
"conv_elementwise_add_fuse_pass", //
"fc_gru_fuse_pass", //
"anakin_subgraph_pass", "anakin_subgraph_pass",
}); });
...@@ -97,13 +95,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { ...@@ -97,13 +95,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add2_act_fuse_pass", // "conv_elementwise_add2_act_fuse_pass", //
"conv_elementwise_add_fuse_pass", // "conv_elementwise_add_fuse_pass", //
"runtime_context_cache_pass", // "runtime_context_cache_pass", //
#endif #endif //
"transpose_flatten_concat_fuse_pass",
}); });
for (int i = 6; i >= 2; i--) {
passes_.push_back("transpose_flatten" + std::to_string(i) +
"_concat_fuse_pass");
}
use_gpu_ = true; use_gpu_ = true;
} }
......
...@@ -23,6 +23,12 @@ function(inference_analysis_api_test target install_dir filename) ...@@ -23,6 +23,12 @@ function(inference_analysis_api_test target install_dir filename)
ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt) ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt)
endfunction() endfunction()
function(inference_analysis_api_int8_test target model_dir data_dir filename)
inference_analysis_test(${target} SRCS ${filename}
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} benchmark
ARGS --infer_model=${model_dir}/model --infer_data=${data_dir}/data.bin --batch_size=100)
endfunction()
function(inference_analysis_api_test_with_fake_data target install_dir filename model_name) function(inference_analysis_api_test_with_fake_data target install_dir filename model_name)
download_model(${install_dir} ${model_name}) download_model(${install_dir} ${model_name})
inference_analysis_test(${target} SRCS ${filename} inference_analysis_test(${target} SRCS ${filename}
...@@ -138,6 +144,28 @@ inference_analysis_api_test_with_fake_data(test_analyzer_resnet50 ...@@ -138,6 +144,28 @@ inference_analysis_api_test_with_fake_data(test_analyzer_resnet50
inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_conv inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_conv
"${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz" SERIAL) "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz" SERIAL)
# int8 image classification tests
if(WITH_MKLDNN)
set(INT8_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8")
if (NOT EXISTS ${INT8_DATA_DIR})
inference_download_and_uncompress(${INT8_DATA_DIR} "https://paddle-inference-dist.bj.bcebos.com/int8" "imagenet_val_100.tar.gz")
endif()
#resnet50 int8
set(INT8_RESNET50_MODEL_DIR "${INT8_DATA_DIR}/resnet50")
if (NOT EXISTS ${INT8_RESNET50_MODEL_DIR})
inference_download_and_uncompress(${INT8_RESNET50_MODEL_DIR} "https://paddle-inference-dist.bj.bcebos.com/int8" "resnet50_int8_model.tar.gz" )
endif()
inference_analysis_api_int8_test(test_analyzer_int8_resnet50 ${INT8_RESNET50_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL)
#mobilenet int8
set(INT8_MOBILENET_MODEL_DIR "${INT8_DATA_DIR}/mobilenet")
if (NOT EXISTS ${INT8_MOBILENET_MODEL_DIR})
inference_download_and_uncompress(${INT8_MOBILENET_MODEL_DIR} "https://paddle-inference-dist.bj.bcebos.com/int8" "mobilenetv1_int8_model.tar.gz" )
endif()
inference_analysis_api_int8_test(test_analyzer_int8_mobilenet ${INT8_MOBILENET_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL)
endif()
# bert, max_len=20, embedding_dim=128 # bert, max_len=20, embedding_dim=128
set(BERT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/bert_emb128") set(BERT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/bert_emb128")
download_model_and_data(${BERT_INSTALL_DIR} "bert_emb128_model.tar.gz" "bert_data_len20.txt.tar.gz") download_model_and_data(${BERT_INSTALL_DIR} "bert_emb128_model.tar.gz" "bert_data_len20.txt.tar.gz")
......
...@@ -53,19 +53,6 @@ void Split(const std::string &line, char sep, std::vector<T> *v) { ...@@ -53,19 +53,6 @@ void Split(const std::string &line, char sep, std::vector<T> *v) {
} }
} }
template <typename T>
constexpr paddle::PaddleDType GetPaddleDType();
template <>
constexpr paddle::PaddleDType GetPaddleDType<int64_t>() {
return paddle::PaddleDType::INT64;
}
template <>
constexpr paddle::PaddleDType GetPaddleDType<float>() {
return paddle::PaddleDType::FLOAT32;
}
// Parse tensor from string // Parse tensor from string
template <typename T> template <typename T>
bool ParseTensor(const std::string &field, paddle::PaddleTensor *tensor) { bool ParseTensor(const std::string &field, paddle::PaddleTensor *tensor) {
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
DEFINE_int32(iterations, 0, "Number of iterations");
namespace paddle {
namespace inference {
namespace analysis {
void SetConfig(AnalysisConfig *cfg) {
cfg->SetModel(FLAGS_infer_model);
cfg->SetProgFile("__model__");
cfg->DisableGpu();
cfg->SwitchIrOptim();
cfg->SwitchSpecifyInputNames(false);
cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
cfg->EnableMKLDNN();
}
template <typename T>
class TensorReader {
public:
TensorReader(std::ifstream &file, size_t beginning_offset,
std::vector<int> shape, std::string name)
: file_(file), position(beginning_offset), shape_(shape), name_(name) {
numel =
std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<T>());
}
PaddleTensor NextBatch() {
PaddleTensor tensor;
tensor.name = name_;
tensor.shape = shape_;
tensor.dtype = GetPaddleDType<T>();
tensor.data.Resize(numel * sizeof(T));
file_.seekg(position);
file_.read(static_cast<char *>(tensor.data.data()), numel * sizeof(T));
position = file_.tellg();
if (file_.eof()) LOG(ERROR) << name_ << ": reached end of stream";
if (file_.fail())
throw std::runtime_error(name_ + ": failed reading file.");
return tensor;
}
protected:
std::ifstream &file_;
size_t position;
std::vector<int> shape_;
std::string name_;
size_t numel;
};
std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
const std::vector<std::vector<PaddleTensor>> &test_data, int num_images) {
int test_data_batch_size = test_data[0][0].shape[0];
CHECK_LE(static_cast<size_t>(num_images),
test_data.size() * test_data_batch_size);
PaddleTensor images;
images.name = "input";
images.shape = {num_images, 3, 224, 224};
images.dtype = PaddleDType::FLOAT32;
images.data.Resize(sizeof(float) * num_images * 3 * 224 * 224);
PaddleTensor labels;
labels.name = "labels";
labels.shape = {num_images, 1};
labels.dtype = PaddleDType::INT64;
labels.data.Resize(sizeof(int64_t) * num_images);
for (int i = 0; i < num_images; i++) {
auto batch = i / test_data_batch_size;
auto element_in_batch = i % test_data_batch_size;
std::copy_n(static_cast<float *>(test_data[batch][0].data.data()) +
element_in_batch * 3 * 224 * 224,
3 * 224 * 224,
static_cast<float *>(images.data.data()) + i * 3 * 224 * 224);
std::copy_n(static_cast<int64_t *>(test_data[batch][1].data.data()) +
element_in_batch,
1, static_cast<int64_t *>(labels.data.data()) + i);
}
auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(2);
(*warmup_data)[0] = std::move(images);
(*warmup_data)[1] = std::move(labels);
return warmup_data;
}
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
int32_t batch_size = FLAGS_batch_size) {
std::ifstream file(FLAGS_infer_data, std::ios::binary);
if (!file) {
FAIL() << "Couldn't open file: " << FLAGS_infer_data;
}
int64_t total_images{0};
file.read(reinterpret_cast<char *>(&total_images), sizeof(total_images));
LOG(INFO) << "Total images in file: " << total_images;
std::vector<int> image_batch_shape{batch_size, 3, 224, 224};
std::vector<int> label_batch_shape{batch_size, 1};
auto labels_offset_in_file =
static_cast<size_t>(file.tellg()) +
sizeof(float) * total_images *
std::accumulate(image_batch_shape.begin() + 1,
image_batch_shape.end(), 1, std::multiplies<int>());
TensorReader<float> image_reader(file, 0, image_batch_shape, "input");
TensorReader<int64_t> label_reader(file, labels_offset_in_file,
label_batch_shape, "label");
auto iterations = total_images / batch_size;
if (FLAGS_iterations > 0 && FLAGS_iterations < iterations)
iterations = FLAGS_iterations;
for (auto i = 0; i < iterations; i++) {
auto images = image_reader.NextBatch();
auto labels = label_reader.NextBatch();
inputs->emplace_back(
std::vector<PaddleTensor>{std::move(images), std::move(labels)});
}
}
TEST(Analyzer_int8_resnet50, quantization) {
AnalysisConfig cfg;
SetConfig(&cfg);
AnalysisConfig q_cfg;
SetConfig(&q_cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all, 100);
std::shared_ptr<std::vector<PaddleTensor>> warmup_data =
GetWarmupData(input_slots_all, 100);
q_cfg.EnableMkldnnQuantizer();
q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data);
q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(100);
CompareQuantizedAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
reinterpret_cast<const PaddlePredictor::Config *>(&q_cfg),
input_slots_all);
}
} // namespace analysis
} // namespace inference
} // namespace paddle
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import os
import numpy as np
import time
import sys
import random
import functools
import contextlib
from PIL import Image, ImageEnhance
import math
from paddle.dataset.common import download
random.seed(0)
np.random.seed(0)
DATA_DIM = 224
SIZE_FLOAT32 = 4
SIZE_INT64 = 8
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
def resize_short(img, target_size):
percent = float(target_size) / min(img.size[0], img.size[1])
resized_width = int(round(img.size[0] * percent))
resized_height = int(round(img.size[1] * percent))
img = img.resize((resized_width, resized_height), Image.LANCZOS)
return img
def crop_image(img, target_size, center):
width, height = img.size
size = target_size
if center == True:
w_start = (width - size) / 2
h_start = (height - size) / 2
else:
w_start = np.random.randint(0, width - size + 1)
h_start = np.random.randint(0, height - size + 1)
w_end = w_start + size
h_end = h_start + size
img = img.crop((w_start, h_start, w_end, h_end))
return img
def process_image(img_path, mode, color_jitter, rotate):
img = Image.open(img_path)
img = resize_short(img, target_size=256)
img = crop_image(img, target_size=DATA_DIM, center=True)
if img.mode != 'RGB':
img = img.convert('RGB')
img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
img -= img_mean
img /= img_std
return img
def download_unzip():
int8_download = 'int8/download'
target_name = 'data'
cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' +
int8_download)
target_folder = os.path.join(cache_folder, target_name)
data_urls = []
data_md5s = []
data_urls.append(
'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partaa'
)
data_md5s.append('60f6525b0e1d127f345641d75d41f0a8')
data_urls.append(
'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab'
)
data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5')
file_names = []
for i in range(0, len(data_urls)):
download(data_urls[i], cache_folder, data_md5s[i])
file_names.append(data_urls[i].split('/')[-1])
zip_path = os.path.join(cache_folder, 'full_imagenet_val.tar.gz')
if not os.path.exists(zip_path):
cat_command = 'cat'
for file_name in file_names:
cat_command += ' ' + os.path.join(cache_folder, file_name)
cat_command += ' > ' + zip_path
os.system(cat_command)
print('Data is downloaded at {0}\n').format(zip_path)
if not os.path.exists(target_folder):
cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder, zip_path)
os.system(cmd)
print('Data is unzipped at {0}\n'.format(target_folder))
data_dir = os.path.join(target_folder, 'ILSVRC2012')
print('ILSVRC2012 full val set at {0}\n'.format(data_dir))
return data_dir
def reader():
data_dir = download_unzip()
file_list = os.path.join(data_dir, 'val_list.txt')
output_file = os.path.join(data_dir, 'int8_full_val.bin')
with open(file_list) as flist:
lines = [line.strip() for line in flist]
num_images = len(lines)
if not os.path.exists(output_file):
print(
'Preprocessing to binary file...<num_images><all images><all labels>...\n'
)
with open(output_file, "w+b") as of:
#save num_images(int64_t) to file
of.seek(0)
num = np.array(int(num_images)).astype('int64')
of.write(num.tobytes())
for idx, line in enumerate(lines):
img_path, label = line.split()
img_path = os.path.join(data_dir, img_path)
if not os.path.exists(img_path):
continue
#save image(float32) to file
img = process_image(
img_path, 'val', color_jitter=False, rotate=False)
np_img = np.array(img)
of.seek(SIZE_INT64 + SIZE_FLOAT32 * DATA_DIM * DATA_DIM * 3
* idx)
of.write(np_img.astype('float32').tobytes())
#save label(int64_t) to file
label_int = (int)(label)
np_label = np.array(label_int)
of.seek(SIZE_INT64 + SIZE_FLOAT32 * DATA_DIM * DATA_DIM * 3
* num_images + idx * SIZE_INT64)
of.write(np_label.astype('int64').tobytes())
print('The preprocessed binary file path {}\n'.format(output_file))
if __name__ == '__main__':
reader()
...@@ -50,6 +50,7 @@ DEFINE_bool(use_analysis, true, ...@@ -50,6 +50,7 @@ DEFINE_bool(use_analysis, true,
DEFINE_bool(record_benchmark, false, DEFINE_bool(record_benchmark, false,
"Record benchmark after profiling the model"); "Record benchmark after profiling the model");
DEFINE_double(accuracy, 1e-3, "Result Accuracy."); DEFINE_double(accuracy, 1e-3, "Result Accuracy.");
DEFINE_double(quantized_accuracy, 1e-2, "Result Quantized Accuracy.");
DEFINE_bool(zero_copy, false, "Use ZeroCopy to speedup Feed/Fetch."); DEFINE_bool(zero_copy, false, "Use ZeroCopy to speedup Feed/Fetch.");
DECLARE_bool(profile); DECLARE_bool(profile);
...@@ -58,6 +59,19 @@ DECLARE_int32(paddle_num_threads); ...@@ -58,6 +59,19 @@ DECLARE_int32(paddle_num_threads);
namespace paddle { namespace paddle {
namespace inference { namespace inference {
template <typename T>
constexpr paddle::PaddleDType GetPaddleDType();
template <>
constexpr paddle::PaddleDType GetPaddleDType<int64_t>() {
return paddle::PaddleDType::INT64;
}
template <>
constexpr paddle::PaddleDType GetPaddleDType<float>() {
return paddle::PaddleDType::FLOAT32;
}
void PrintConfig(const PaddlePredictor::Config *config, bool use_analysis) { void PrintConfig(const PaddlePredictor::Config *config, bool use_analysis) {
const auto *analysis_config = const auto *analysis_config =
reinterpret_cast<const AnalysisConfig *>(config); reinterpret_cast<const AnalysisConfig *>(config);
...@@ -392,6 +406,32 @@ void TestPrediction(const PaddlePredictor::Config *config, ...@@ -392,6 +406,32 @@ void TestPrediction(const PaddlePredictor::Config *config,
} }
} }
void CompareTopAccuracy(const std::vector<PaddleTensor> &output_slots1,
const std::vector<PaddleTensor> &output_slots2) {
// first output: avg_cost
if (output_slots1.size() == 0 || output_slots2.size() == 0)
throw std::invalid_argument(
"CompareTopAccuracy: output_slots vector is empty.");
PADDLE_ENFORCE(output_slots1.size() >= 2UL);
PADDLE_ENFORCE(output_slots2.size() >= 2UL);
// second output: acc_top1
if (output_slots1[1].lod.size() > 0 || output_slots2[1].lod.size() > 0)
throw std::invalid_argument(
"CompareTopAccuracy: top1 accuracy output has nonempty LoD.");
if (output_slots1[1].dtype != paddle::PaddleDType::FLOAT32 ||
output_slots2[1].dtype != paddle::PaddleDType::FLOAT32)
throw std::invalid_argument(
"CompareTopAccuracy: top1 accuracy output is of a wrong type.");
float *top1_quantized = static_cast<float *>(output_slots1[1].data.data());
float *top1_reference = static_cast<float *>(output_slots2[1].data.data());
LOG(INFO) << "top1 INT8 accuracy: " << *top1_quantized;
LOG(INFO) << "top1 FP32 accuracy: " << *top1_reference;
LOG(INFO) << "Accepted accuracy drop threshold: " << FLAGS_quantized_accuracy;
CHECK_LE(std::abs(*top1_quantized - *top1_reference),
FLAGS_quantized_accuracy);
}
void CompareDeterministic( void CompareDeterministic(
const PaddlePredictor::Config *config, const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs) { const std::vector<std::vector<PaddleTensor>> &inputs) {
...@@ -421,6 +461,17 @@ void CompareNativeAndAnalysis( ...@@ -421,6 +461,17 @@ void CompareNativeAndAnalysis(
CompareResult(analysis_outputs, native_outputs); CompareResult(analysis_outputs, native_outputs);
} }
void CompareQuantizedAndAnalysis(
const PaddlePredictor::Config *config,
const PaddlePredictor::Config *qconfig,
const std::vector<std::vector<PaddleTensor>> &inputs) {
PrintConfig(config, true);
std::vector<PaddleTensor> analysis_outputs, quantized_outputs;
TestOneThreadPrediction(config, inputs, &analysis_outputs, true);
TestOneThreadPrediction(qconfig, inputs, &quantized_outputs, true);
CompareTopAccuracy(quantized_outputs, analysis_outputs);
}
void CompareNativeAndAnalysis( void CompareNativeAndAnalysis(
PaddlePredictor *native_pred, PaddlePredictor *analysis_pred, PaddlePredictor *native_pred, PaddlePredictor *analysis_pred,
const std::vector<std::vector<PaddleTensor>> &inputs) { const std::vector<std::vector<PaddleTensor>> &inputs) {
......
...@@ -23,6 +23,16 @@ limitations under the License. */ ...@@ -23,6 +23,16 @@ limitations under the License. */
#include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/cudnn_helper.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
// CUDNN_BATCHNORM_SPATIAL_PERSISTENT in batchnorm. This mode can be faster in
// some tasks because an optimized path may be selected for CUDNN_DATA_FLOAT
// and CUDNN_DATA_HALF data types, compute capability 6.0 or higher. The
// reason we set it to false by default is that this mode may use scaled
// atomic integer reduction that may cause a numerical overflow for certain
// input data range.
DEFINE_bool(cudnn_batchnorm_spatial_persistent, false,
"Whether enable CUDNN_BATCHNORM_SPATIAL_PERSISTENT mode for cudnn "
"batch_norm, defalut is False.");
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -76,7 +86,11 @@ class BatchNormKernel<platform::CUDADeviceContext, T> ...@@ -76,7 +86,11 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
} }
epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON);
#if CUDNN_VERSION_MIN(7, 0, 0) #if CUDNN_VERSION_MIN(7, 0, 0)
mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; if (FLAGS_cudnn_batchnorm_spatial_persistent) {
mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT;
} else {
mode_ = CUDNN_BATCHNORM_SPATIAL;
}
#else #else
mode_ = CUDNN_BATCHNORM_SPATIAL; mode_ = CUDNN_BATCHNORM_SPATIAL;
#endif #endif
...@@ -302,7 +316,11 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T> ...@@ -302,7 +316,11 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
} }
epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON);
#if CUDNN_VERSION_MIN(7, 0, 0) #if CUDNN_VERSION_MIN(7, 0, 0)
mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; if (FLAGS_cudnn_batchnorm_spatial_persistent) {
mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT;
} else {
mode_ = CUDNN_BATCHNORM_SPATIAL;
}
#else #else
mode_ = CUDNN_BATCHNORM_SPATIAL; mode_ = CUDNN_BATCHNORM_SPATIAL;
#endif #endif
......
...@@ -386,7 +386,7 @@ void BenchKernelSoftmax() { ...@@ -386,7 +386,7 @@ void BenchKernelSoftmax() {
RandomVec<T>(bs * n, x.mutable_data<T>(PlaceType()), -2.f, 2.f); RandomVec<T>(bs * n, x.mutable_data<T>(PlaceType()), -2.f, 2.f);
const T* x_data = x.data<T>(); const T* x_data = x.data<T>();
T* y_data = y.mutable_data<T>(PlaceType()); T* y_data = y.mutable_data<T>(PlaceType());
BenchAllImpls<KernelTuple, PlaceType>(n, x_data, y_data, n, bs); BenchAllImpls<KernelTuple, PlaceType>(n, x_data, y_data, n, bs, 1);
} }
} }
} }
......
...@@ -34,6 +34,7 @@ const char* to_string(KernelType kt) { ...@@ -34,6 +34,7 @@ const char* to_string(KernelType kt) {
ONE_CASE(kVAddRelu); ONE_CASE(kVAddRelu);
ONE_CASE(kVSub); ONE_CASE(kVSub);
ONE_CASE(kVScal); ONE_CASE(kVScal);
ONE_CASE(kStrideScal);
ONE_CASE(kVAddBias); ONE_CASE(kVAddBias);
ONE_CASE(kVRelu); ONE_CASE(kVRelu);
ONE_CASE(kVBroadcast); ONE_CASE(kVBroadcast);
...@@ -55,6 +56,7 @@ const char* to_string(KernelType kt) { ...@@ -55,6 +56,7 @@ const char* to_string(KernelType kt) {
ONE_CASE(kMatMul); ONE_CASE(kMatMul);
ONE_CASE(kHMax); ONE_CASE(kHMax);
ONE_CASE(kHSum); ONE_CASE(kHSum);
ONE_CASE(kStrideASum);
ONE_CASE(kSoftmax); ONE_CASE(kSoftmax);
ONE_CASE(kEmbSeqPool); ONE_CASE(kEmbSeqPool);
ONE_CASE(kSgd); ONE_CASE(kSgd);
......
...@@ -38,6 +38,8 @@ typedef enum { ...@@ -38,6 +38,8 @@ typedef enum {
kNCHW16CMulNC, kNCHW16CMulNC,
kSeqPool, kSeqPool,
kSoftmax, kSoftmax,
kStrideASum,
kStrideScal,
kVAdd, kVAdd,
kVAddBias, kVAddBias,
kVAddRelu, kVAddRelu,
...@@ -74,6 +76,14 @@ struct XYZNTuple { ...@@ -74,6 +76,14 @@ struct XYZNTuple {
template <typename T> template <typename T>
struct AXYNTuple : public XYZNTuple<T> {}; struct AXYNTuple : public XYZNTuple<T> {};
// a, x, y, n, stride
template <typename T>
struct AXYNSTuple {
typedef T data_type;
typedef int attr_type;
typedef void (*func_type)(const T*, const T*, T*, int, int);
};
// x, y, n // x, y, n
template <typename T> template <typename T>
struct XYNTuple { struct XYNTuple {
...@@ -86,6 +96,14 @@ struct XYNTuple { ...@@ -86,6 +96,14 @@ struct XYNTuple {
template <typename T> template <typename T>
struct XRNTuple : public XYNTuple<T> {}; struct XRNTuple : public XYNTuple<T> {};
// x, returned value, n, stride
template <typename T>
struct XRNSTuple {
typedef T data_type;
typedef int attr_type;
typedef void (*func_type)(const T*, T*, int, int);
};
#define DECLARE_KERNELTUPLE(kernel_tuple, type) \ #define DECLARE_KERNELTUPLE(kernel_tuple, type) \
template <typename T> \ template <typename T> \
struct type##Tuple : public kernel_tuple<T> { \ struct type##Tuple : public kernel_tuple<T> { \
...@@ -101,6 +119,8 @@ DECLARE_KERNELTUPLE(XYZNTuple, VSub); ...@@ -101,6 +119,8 @@ DECLARE_KERNELTUPLE(XYZNTuple, VSub);
DECLARE_KERNELTUPLE(AXYNTuple, VScal); DECLARE_KERNELTUPLE(AXYNTuple, VScal);
DECLARE_KERNELTUPLE(AXYNTuple, VAddBias); DECLARE_KERNELTUPLE(AXYNTuple, VAddBias);
DECLARE_KERNELTUPLE(AXYNSTuple, StrideScal);
DECLARE_KERNELTUPLE(XYNTuple, VRelu); DECLARE_KERNELTUPLE(XYNTuple, VRelu);
DECLARE_KERNELTUPLE(XYNTuple, VIdentity); DECLARE_KERNELTUPLE(XYNTuple, VIdentity);
DECLARE_KERNELTUPLE(XYNTuple, VSquare); DECLARE_KERNELTUPLE(XYNTuple, VSquare);
...@@ -112,6 +132,8 @@ DECLARE_KERNELTUPLE(XYNTuple, VCopy); ...@@ -112,6 +132,8 @@ DECLARE_KERNELTUPLE(XYNTuple, VCopy);
DECLARE_KERNELTUPLE(XRNTuple, HMax); DECLARE_KERNELTUPLE(XRNTuple, HMax);
DECLARE_KERNELTUPLE(XRNTuple, HSum); DECLARE_KERNELTUPLE(XRNTuple, HSum);
DECLARE_KERNELTUPLE(XRNSTuple, StrideASum);
typedef struct { typedef struct {
void* gates; // gates: x_ch, x_ih, x_fh, x_oh void* gates; // gates: x_ch, x_ih, x_fh, x_oh
const void* ct_1; const void* ct_1;
...@@ -285,7 +307,7 @@ struct SoftmaxTuple { ...@@ -285,7 +307,7 @@ struct SoftmaxTuple {
static constexpr KernelType kernel_type = kSoftmax; static constexpr KernelType kernel_type = kSoftmax;
typedef T data_type; typedef T data_type;
typedef int attr_type; typedef int attr_type;
typedef void (*func_type)(const T*, T*, int, int); typedef void (*func_type)(const T*, T*, int, int, int);
}; };
// nChw16c = nChw16c .* NC // nChw16c = nChw16c .* NC
......
...@@ -50,10 +50,15 @@ void VTanh(const T* x, T* y, int n) { ...@@ -50,10 +50,15 @@ void VTanh(const T* x, T* y, int n) {
compute_addbias(&b, y, y, n); compute_addbias(&b, y, y, n);
} }
void Softmax(const T* x, T* y, int n, int bs) { // remain is the product of dimension shapes after the axis dimension
void Softmax(const T* x, T* y, int n, int bs, int remain) {
auto compute_hmax = KernelFuncs<HMaxTuple<T>, CPUPlace>::Cache().At(n); auto compute_hmax = KernelFuncs<HMaxTuple<T>, CPUPlace>::Cache().At(n);
auto compute_hsum = KernelFuncs<HSumTuple<T>, CPUPlace>::Cache().At(n); auto compute_hsum = KernelFuncs<HSumTuple<T>, CPUPlace>::Cache().At(n);
auto compute_vscal = KernelFuncs<VScalTuple<T>, CPUPlace>::Cache().At(n); auto compute_vscal = KernelFuncs<VScalTuple<T>, CPUPlace>::Cache().At(n);
auto compute_strideasum =
KernelFuncs<StrideASumTuple<T>, CPUPlace>::Cache().At(n);
auto compute_stridescal =
KernelFuncs<StrideScalTuple<T>, CPUPlace>::Cache().At(n);
auto compute_vaddbias = auto compute_vaddbias =
KernelFuncs<VAddBiasTuple<T>, CPUPlace>::Cache().At(n); KernelFuncs<VAddBiasTuple<T>, CPUPlace>::Cache().At(n);
auto compute_vexp = KernelFuncs<VExpTuple<T>, CPUPlace>::Cache().At(n); auto compute_vexp = KernelFuncs<VExpTuple<T>, CPUPlace>::Cache().At(n);
...@@ -64,9 +69,17 @@ void Softmax(const T* x, T* y, int n, int bs) { ...@@ -64,9 +69,17 @@ void Softmax(const T* x, T* y, int n, int bs) {
scalar = static_cast<T>(0) - scalar; scalar = static_cast<T>(0) - scalar;
compute_vaddbias(&scalar, x, y, n); // x - max compute_vaddbias(&scalar, x, y, n); // x - max
compute_vexp(y, y, n); compute_vexp(y, y, n);
compute_hsum(y, &scalar, n); if (remain == 1) {
scalar = static_cast<T>(1) / scalar; compute_hsum(y, &scalar, n);
compute_vscal(&scalar, y, y, n); scalar = static_cast<T>(1) / scalar;
compute_vscal(&scalar, y, y, n);
} else {
for (int j = 0; j < remain; ++j) {
compute_strideasum(&y[j], &scalar, n, remain);
scalar = static_cast<T>(1) / scalar;
compute_stridescal(&scalar, &y[j], &y[j], n, remain);
}
}
x += n; x += n;
y += n; y += n;
} }
......
...@@ -26,7 +26,7 @@ using T = float; ...@@ -26,7 +26,7 @@ using T = float;
void VSigmoid(const T* x, T* y, int n); void VSigmoid(const T* x, T* y, int n);
void VTanh(const T* x, T* y, int n); void VTanh(const T* x, T* y, int n);
void Softmax(const T* x, T* y, int n, int bs); void Softmax(const T* x, T* y, int n, int bs, int remain);
void LSTMCtHt(lstm_t* step, const lstm_attr_t* attr); void LSTMCtHt(lstm_t* step, const lstm_attr_t* attr);
void LSTMC1H1(lstm_t* step, const lstm_attr_t* attr); void LSTMC1H1(lstm_t* step, const lstm_attr_t* attr);
......
...@@ -7,6 +7,7 @@ USE_JITKERNEL_MORE(kMatMul, mkl) ...@@ -7,6 +7,7 @@ USE_JITKERNEL_MORE(kMatMul, mkl)
USE_JITKERNEL_MORE(kVMul, mkl) USE_JITKERNEL_MORE(kVMul, mkl)
USE_JITKERNEL_MORE(kVAdd, mkl) USE_JITKERNEL_MORE(kVAdd, mkl)
USE_JITKERNEL_MORE(kVScal, mkl) USE_JITKERNEL_MORE(kVScal, mkl)
USE_JITKERNEL_MORE(kStrideScal, mkl)
USE_JITKERNEL_MORE(kVExp, mkl) USE_JITKERNEL_MORE(kVExp, mkl)
USE_JITKERNEL_MORE(kVSquare, mkl) USE_JITKERNEL_MORE(kVSquare, mkl)
USE_JITKERNEL_MORE(kVCopy, mkl) USE_JITKERNEL_MORE(kVCopy, mkl)
......
...@@ -78,6 +78,26 @@ void VScal<double>(const double* a, const double* x, double* y, int n) { ...@@ -78,6 +78,26 @@ void VScal<double>(const double* a, const double* x, double* y, int n) {
} }
} }
template <>
void StrideScal<float>(const float* a, const float* x, float* y, int n,
int stride) {
if (x == y) {
platform::dynload::cblas_sscal(n / stride, *a, y, stride);
} else {
refer::StrideScal<float>(a, x, y, n, stride);
}
}
template <>
void StrideScal<double>(const double* a, const double* x, double* y, int n,
int stride) {
if (x == y) {
platform::dynload::cblas_dscal(n / stride, *a, y, stride);
} else {
refer::StrideScal<double>(a, x, y, n, stride);
}
}
template <> template <>
void VExp<float>(const float* x, float* y, int n) { void VExp<float>(const float* x, float* y, int n) {
platform::dynload::vsExp(n, x, y); platform::dynload::vsExp(n, x, y);
...@@ -128,6 +148,16 @@ void ASum<double>(const double* x, double* res, int n) { ...@@ -128,6 +148,16 @@ void ASum<double>(const double* x, double* res, int n) {
res[0] = platform::dynload::cblas_dasum(n, x, 1); res[0] = platform::dynload::cblas_dasum(n, x, 1);
} }
template <>
void StrideASum<float>(const float* x, float* res, int n, int stride) {
res[0] = platform::dynload::cblas_sasum(n / stride, x, stride);
}
template <>
void StrideASum<double>(const double* x, double* res, int n, int stride) {
res[0] = platform::dynload::cblas_dasum(n / stride, x, stride);
}
// TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512 // TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512
template <> template <>
bool VMulKernel<float>::CanBeUsed(const int& d) const { bool VMulKernel<float>::CanBeUsed(const int& d) const {
...@@ -144,6 +174,11 @@ bool VScalKernel<float>::CanBeUsed(const int& d) const { ...@@ -144,6 +174,11 @@ bool VScalKernel<float>::CanBeUsed(const int& d) const {
return platform::MayIUse(platform::avx512f) && d > 512; return platform::MayIUse(platform::avx512f) && d > 512;
} }
template <>
bool StrideScalKernel<float>::CanBeUsed(const int& d) const {
return true;
}
template <> template <>
bool VExpKernel<float>::CanBeUsed(const int& d) const { bool VExpKernel<float>::CanBeUsed(const int& d) const {
return d > 7; return d > 7;
...@@ -235,6 +270,7 @@ bool SoftmaxKernel<float>::CanBeUsed(const int& d) const { ...@@ -235,6 +270,7 @@ bool SoftmaxKernel<float>::CanBeUsed(const int& d) const {
AWALYS_USE_ME_WITH_DOUBLE(VMul); AWALYS_USE_ME_WITH_DOUBLE(VMul);
AWALYS_USE_ME_WITH_DOUBLE(VAdd); AWALYS_USE_ME_WITH_DOUBLE(VAdd);
AWALYS_USE_ME_WITH_DOUBLE(VScal); AWALYS_USE_ME_WITH_DOUBLE(VScal);
AWALYS_USE_ME_WITH_DOUBLE(StrideScal);
AWALYS_USE_ME_WITH_DOUBLE(VExp); AWALYS_USE_ME_WITH_DOUBLE(VExp);
AWALYS_USE_ME_WITH_DOUBLE(VSigmoid); AWALYS_USE_ME_WITH_DOUBLE(VSigmoid);
AWALYS_USE_ME_WITH_DOUBLE(VTanh); AWALYS_USE_ME_WITH_DOUBLE(VTanh);
...@@ -259,6 +295,7 @@ REGISTER_MKL_KERNEL(MatMul); ...@@ -259,6 +295,7 @@ REGISTER_MKL_KERNEL(MatMul);
REGISTER_MKL_KERNEL(VMul); REGISTER_MKL_KERNEL(VMul);
REGISTER_MKL_KERNEL(VAdd); REGISTER_MKL_KERNEL(VAdd);
REGISTER_MKL_KERNEL(VScal); REGISTER_MKL_KERNEL(VScal);
REGISTER_MKL_KERNEL(StrideScal);
REGISTER_MKL_KERNEL(VExp); REGISTER_MKL_KERNEL(VExp);
REGISTER_MKL_KERNEL(VSquare); REGISTER_MKL_KERNEL(VSquare);
REGISTER_MKL_KERNEL(VCopy); REGISTER_MKL_KERNEL(VCopy);
......
...@@ -129,7 +129,14 @@ template <typename T> ...@@ -129,7 +129,14 @@ template <typename T>
void ASum(const T* x, T* res, int n); void ASum(const T* x, T* res, int n);
template <typename T> template <typename T>
void Softmax(const T* x, T* y, int n, int bs) { void StrideASum(const T* x, T* res, int n, int stride);
template <typename T>
void StrideScal(const T* a, const T* x, T* y, int n, int stride);
// remain is the product of dimension shapes after the axis dimension
template <typename T>
void Softmax(const T* x, T* y, int n, int bs, int remain = 1) {
std::vector<T> entities(bs); std::vector<T> entities(bs);
for (int i = 0; i < bs; ++i) { for (int i = 0; i < bs; ++i) {
entities[i] = x[i * n]; entities[i] = x[i * n];
...@@ -143,9 +150,17 @@ void Softmax(const T* x, T* y, int n, int bs) { ...@@ -143,9 +150,17 @@ void Softmax(const T* x, T* y, int n, int bs) {
VExp(y, y, n * bs); VExp(y, y, n * bs);
for (int i = 0; i < bs; ++i) { for (int i = 0; i < bs; ++i) {
T sum; T sum;
ASum(&y[i * n], &sum, n); if (remain == 1) {
sum = static_cast<T>(1) / sum; ASum(&y[i * n], &sum, n);
VScal(&sum, &y[i * n], &y[i * n], n); sum = static_cast<T>(1) / sum;
VScal(&sum, &y[i * n], &y[i * n], n);
} else {
for (int j = 0; j < remain; ++j) {
StrideASum(&y[i * n + j], &sum, n, remain);
sum = static_cast<T>(1) / sum;
StrideScal(&sum, &y[i * n + j], &y[i * n + j], n, remain);
}
}
} }
} }
...@@ -193,6 +208,7 @@ DECLARE_MKL_KERNEL(VAdd); ...@@ -193,6 +208,7 @@ DECLARE_MKL_KERNEL(VAdd);
// AXYN // AXYN
DECLARE_MKL_KERNEL(VScal); DECLARE_MKL_KERNEL(VScal);
DECLARE_MKL_KERNEL(StrideScal);
// XYN // XYN
DECLARE_MKL_KERNEL(VExp); DECLARE_MKL_KERNEL(VExp);
......
...@@ -12,6 +12,7 @@ USE_JITKERNEL_REFER(kVAdd) ...@@ -12,6 +12,7 @@ USE_JITKERNEL_REFER(kVAdd)
USE_JITKERNEL_REFER(kVAddRelu) USE_JITKERNEL_REFER(kVAddRelu)
USE_JITKERNEL_REFER(kVSub) USE_JITKERNEL_REFER(kVSub)
USE_JITKERNEL_REFER(kVScal) USE_JITKERNEL_REFER(kVScal)
USE_JITKERNEL_REFER(kStrideScal)
USE_JITKERNEL_REFER(kVAddBias) USE_JITKERNEL_REFER(kVAddBias)
USE_JITKERNEL_REFER(kVCopy) USE_JITKERNEL_REFER(kVCopy)
USE_JITKERNEL_REFER(kVRelu) USE_JITKERNEL_REFER(kVRelu)
...@@ -32,6 +33,7 @@ USE_JITKERNEL_REFER(kMatMul) ...@@ -32,6 +33,7 @@ USE_JITKERNEL_REFER(kMatMul)
USE_JITKERNEL_REFER(kVSquare) USE_JITKERNEL_REFER(kVSquare)
USE_JITKERNEL_REFER(kHSum) USE_JITKERNEL_REFER(kHSum)
USE_JITKERNEL_REFER(kHMax) USE_JITKERNEL_REFER(kHMax)
USE_JITKERNEL_REFER(kStrideASum)
USE_JITKERNEL_REFER(kSoftmax) USE_JITKERNEL_REFER(kSoftmax)
USE_JITKERNEL_REFER(kEmbSeqPool) USE_JITKERNEL_REFER(kEmbSeqPool)
USE_JITKERNEL_REFER(kSgd) USE_JITKERNEL_REFER(kSgd)
......
...@@ -27,6 +27,7 @@ REGISTER_REFER_KERNEL(VAddRelu); ...@@ -27,6 +27,7 @@ REGISTER_REFER_KERNEL(VAddRelu);
REGISTER_REFER_KERNEL(VSub); REGISTER_REFER_KERNEL(VSub);
REGISTER_REFER_KERNEL(VScal); REGISTER_REFER_KERNEL(VScal);
REGISTER_REFER_KERNEL(StrideScal);
REGISTER_REFER_KERNEL(VAddBias); REGISTER_REFER_KERNEL(VAddBias);
REGISTER_REFER_KERNEL(VRelu); REGISTER_REFER_KERNEL(VRelu);
...@@ -51,6 +52,7 @@ REGISTER_REFER_KERNEL(SeqPool); ...@@ -51,6 +52,7 @@ REGISTER_REFER_KERNEL(SeqPool);
REGISTER_REFER_KERNEL(MatMul); REGISTER_REFER_KERNEL(MatMul);
REGISTER_REFER_KERNEL(HMax); REGISTER_REFER_KERNEL(HMax);
REGISTER_REFER_KERNEL(HSum); REGISTER_REFER_KERNEL(HSum);
REGISTER_REFER_KERNEL(StrideASum);
REGISTER_REFER_KERNEL(Softmax); REGISTER_REFER_KERNEL(Softmax);
REGISTER_REFER_KERNEL(EmbSeqPool); REGISTER_REFER_KERNEL(EmbSeqPool);
REGISTER_REFER_KERNEL(Sgd); REGISTER_REFER_KERNEL(Sgd);
......
...@@ -411,19 +411,47 @@ void HSum(const T* x, T* res, int n) { ...@@ -411,19 +411,47 @@ void HSum(const T* x, T* res, int n) {
} }
} }
template <typename T>
void StrideASum(const T* x, T* res, int n, int stride) {
res[0] = x[0];
for (int i = stride; i < n; i += stride) {
res[0] += std::abs(x[i]);
}
}
template <typename T>
void StrideScal(const T* a, const T* x, T* y, int n, int stride) {
for (int i = 0; i < n; ++i) {
if (i % stride == 0) {
y[i] = x[i] * a[0];
} else {
y[i] = x[i];
}
}
}
// y = e^(x - max(x)) // y = e^(x - max(x))
// y = y / sum(y) // y = y / sum(y)
// remain is the product of dimension shapes after the axis dimension
template <typename T> template <typename T>
void Softmax(const T* x, T* y, int n, int bs = 1) { void Softmax(const T* x, T* y, int n, int bs = 1, int remain = 1) {
for (int i = 0; i < bs; ++i) { for (int i = 0; i < bs; ++i) {
T scalar; T scalar;
HMax(x, &scalar, n); HMax(x, &scalar, n);
scalar = static_cast<T>(0) - scalar; scalar = static_cast<T>(0) - scalar;
VAddBias(&scalar, x, y, n); // x - max VAddBias(&scalar, x, y, n); // x - max
VExp(y, y, n); VExp(y, y, n);
HSum(y, &scalar, n); if (remain == 1) {
scalar = static_cast<T>(1) / scalar; HSum(y, &scalar, n);
VScal(&scalar, y, y, n); scalar = static_cast<T>(1) / scalar;
VScal(&scalar, y, y, n);
} else {
for (int j = 0; j < remain; j++) {
StrideASum(&y[j], &scalar, n, remain);
scalar = static_cast<T>(1) / scalar;
StrideScal(&scalar, &y[j], &y[j], n, remain);
}
}
x += n; x += n;
y += n; y += n;
} }
...@@ -507,6 +535,9 @@ DECLARE_REFER_KERNEL(VSub); ...@@ -507,6 +535,9 @@ DECLARE_REFER_KERNEL(VSub);
DECLARE_REFER_KERNEL(VScal); DECLARE_REFER_KERNEL(VScal);
DECLARE_REFER_KERNEL(VAddBias); DECLARE_REFER_KERNEL(VAddBias);
// const T* a, const T* x, T* y, int n, int stride
DECLARE_REFER_KERNEL(StrideScal);
// const T* x, T* y, int n // const T* x, T* y, int n
DECLARE_REFER_KERNEL(VRelu); DECLARE_REFER_KERNEL(VRelu);
DECLARE_REFER_KERNEL(VIdentity); DECLARE_REFER_KERNEL(VIdentity);
...@@ -528,6 +559,8 @@ DECLARE_REFER_KERNEL(GRUHtPart2); ...@@ -528,6 +559,8 @@ DECLARE_REFER_KERNEL(GRUHtPart2);
DECLARE_REFER_KERNEL(HMax); DECLARE_REFER_KERNEL(HMax);
DECLARE_REFER_KERNEL(HSum); DECLARE_REFER_KERNEL(HSum);
DECLARE_REFER_KERNEL(StrideASum);
// others // others
DECLARE_REFER_KERNEL(CRFDecoding); DECLARE_REFER_KERNEL(CRFDecoding);
DECLARE_REFER_KERNEL(LayerNorm); DECLARE_REFER_KERNEL(LayerNorm);
......
...@@ -723,39 +723,122 @@ void TestKernelSoftmax() { ...@@ -723,39 +723,122 @@ void TestKernelSoftmax() {
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type); VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
for (int bs : {1, 2, 10}) { for (int bs : {1, 2, 10}) {
for (int n : TestSizes()) { for (int n : TestSizes()) {
for (int m : {1, 2, 3}) { // remain
if (m > n || n % m != 0) {
continue;
}
auto ref = jit::GetReferFunc<KernelTuple>();
EXPECT_TRUE(ref != nullptr);
std::vector<T> x(bs * n), y(bs * n);
RandomVec<T>(bs * n, x.data());
const T* x_data = x.data();
T* y_data = y.data();
std::vector<T> xinp(x.size()); // inplace test
std::copy(x.begin(), x.end(), xinp.begin());
ref(x_data, y_data, n, bs, m);
T* xinp_data = xinp.data();
ref(xinp_data, xinp_data, n, bs, m);
ExpectEQ<T>(xinp_data, y_data, n * bs);
auto verifier = [](const typename KernelTuple::func_type tgt,
const std::vector<T>& x, const std::vector<T>& yref,
int n, int bs, int m) {
EXPECT_TRUE(tgt != nullptr);
EXPECT_EQ(yref.size(), x.size());
EXPECT_EQ(x.size(), static_cast<size_t>(n * bs));
const T* x_data = x.data();
const T* yref_data = yref.data();
std::vector<T> ytgt(n * bs);
T* ytgt_data = ytgt.data();
// test normal
tgt(x_data, ytgt_data, n, bs, m);
ExpectEQ<T>(ytgt_data, yref_data, n * bs);
// test inplace x
std::copy(x.begin(), x.end(), ytgt.begin());
tgt(ytgt_data, ytgt_data, n, bs, m);
ExpectEQ<T>(ytgt_data, yref_data, n * bs);
};
TestAllImpls<KernelTuple, PlaceType>(n, verifier, x, y, n, bs, m);
}
}
}
}
template <typename KernelTuple, typename PlaceType>
void TestKernelStrideASum() {
using T = typename KernelTuple::data_type;
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
for (int d : TestSizes()) {
for (int m : {1, 2, 3}) { // stride
if (m > d || d % m != 0) {
continue;
}
auto ref = jit::GetReferFunc<KernelTuple>();
EXPECT_TRUE(ref != nullptr);
std::vector<T> x(d);
RandomVec<T>(d, x.data());
T ref_res;
ref(x.data(), &ref_res, d, m);
auto verifier = [](const typename KernelTuple::func_type tgt,
const std::vector<T>& x, const T ref_res,
const int m) {
EXPECT_TRUE(tgt != nullptr);
T tgt_res;
tgt(x.data(), &tgt_res, x.size(), m);
ExpectEQ<T>(&tgt_res, &ref_res, 1);
};
TestAllImpls<KernelTuple, PlaceType>(d, verifier, x, ref_res, m);
}
}
}
template <typename KernelTuple, typename PlaceType>
void TestKernelStrideScal() {
using T = typename KernelTuple::data_type;
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
for (int d : TestSizes()) {
for (int m : {1, 2, 3}) { // stride
if (m > d || d % m != 0) {
continue;
}
auto ref = jit::GetReferFunc<KernelTuple>(); auto ref = jit::GetReferFunc<KernelTuple>();
EXPECT_TRUE(ref != nullptr); EXPECT_TRUE(ref != nullptr);
std::vector<T> x(bs * n), y(bs * n);
RandomVec<T>(bs * n, x.data());
const T* x_data = x.data();
T* y_data = y.data();
std::vector<T> xinp(x.size()); // inplace test const T a = static_cast<T>(3);
std::vector<T> x(d), yref(d);
std::vector<T> xinp(d); // inplace test
RandomVec<T>(d, x.data());
std::copy(x.begin(), x.end(), xinp.begin()); std::copy(x.begin(), x.end(), xinp.begin());
ref(x_data, y_data, n, bs);
const T* x_data = x.data();
T* yref_data = yref.data();
T* xinp_data = xinp.data(); T* xinp_data = xinp.data();
ref(xinp_data, xinp_data, n, bs); // test refer code inplace
ExpectEQ<T>(xinp_data, y_data, n * bs); ref(&a, x_data, yref_data, d, m);
ref(&a, xinp_data, xinp_data, d, m);
ExpectEQ<T>(xinp_data, yref_data, d);
auto verifier = [](const typename KernelTuple::func_type tgt, auto verifier = [](const typename KernelTuple::func_type tgt, const T a,
const std::vector<T>& x, const std::vector<T>& yref, const std::vector<T>& x, const std::vector<T>& yref,
int n, int bs) { const int m) {
EXPECT_TRUE(tgt != nullptr); EXPECT_TRUE(tgt != nullptr);
EXPECT_EQ(yref.size(), x.size()); EXPECT_EQ(yref.size(), x.size());
EXPECT_EQ(x.size(), static_cast<size_t>(n * bs));
const T* x_data = x.data(); const T* x_data = x.data();
const T* yref_data = yref.data(); const T* yref_data = yref.data();
std::vector<T> ytgt(n * bs); const int d = yref.size();
std::vector<T> ytgt(d);
T* ytgt_data = ytgt.data(); T* ytgt_data = ytgt.data();
// test normal // test normal
tgt(x_data, ytgt_data, n, bs); tgt(&a, x_data, ytgt_data, d, m);
ExpectEQ<T>(ytgt_data, yref_data, n * bs); ExpectEQ<T>(ytgt_data, yref_data, d);
// test inplace x // test inplace x
std::copy(x.begin(), x.end(), ytgt.begin()); std::copy(x.begin(), x.end(), ytgt.begin());
tgt(ytgt_data, ytgt_data, n, bs); tgt(&a, ytgt_data, ytgt_data, d, m);
ExpectEQ<T>(ytgt_data, yref_data, n * bs); ExpectEQ<T>(ytgt_data, yref_data, d);
}; };
TestAllImpls<KernelTuple, PlaceType>(n, verifier, x, y, n, bs); TestAllImpls<KernelTuple, PlaceType>(d, verifier, a, x, yref, m);
} }
} }
} }
...@@ -912,7 +995,7 @@ TEST(JITKernel_pool, more) { ...@@ -912,7 +995,7 @@ TEST(JITKernel_pool, more) {
EXPECT_EQ(kers.size(), 10UL); EXPECT_EQ(kers.size(), 10UL);
#else #else
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
EXPECT_EQ(kers.size(), 21UL); EXPECT_EQ(kers.size(), 22UL);
#else #else
EXPECT_EQ(kers.size(), 8UL); EXPECT_EQ(kers.size(), 8UL);
#endif #endif
...@@ -921,7 +1004,7 @@ TEST(JITKernel_pool, more) { ...@@ -921,7 +1004,7 @@ TEST(JITKernel_pool, more) {
TEST(JITKernel_pool, refer) { TEST(JITKernel_pool, refer) {
const auto& kers = jit::ReferKernelPool::Instance().AllKernels(); const auto& kers = jit::ReferKernelPool::Instance().AllKernels();
EXPECT_EQ(kers.size(), 29UL); EXPECT_EQ(kers.size(), 31UL);
} }
// test helper // test helper
...@@ -1292,3 +1375,6 @@ TEST_CPU_KERNEL(MatMul); ...@@ -1292,3 +1375,6 @@ TEST_CPU_KERNEL(MatMul);
TEST_CPU_KERNEL(Softmax); TEST_CPU_KERNEL(Softmax);
TEST_CPU_KERNEL(Sgd); TEST_CPU_KERNEL(Sgd);
TEST_CPU_KERNEL(VBroadcast); TEST_CPU_KERNEL(VBroadcast);
TEST_CPU_KERNEL(StrideASum);
TEST_CPU_KERNEL(StrideScal);
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/kldiv_loss_op.h"
#include <memory>
#include <string>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class KLDivLossOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of KLDivLossOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Target"),
"Input(Target) of KLDivLossOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Loss"),
"Output(Loss) of KLDivLossOp should not be null.");
auto dim_x = ctx->GetInputDim("X");
auto dim_target = ctx->GetInputDim("Target");
PADDLE_ENFORCE_EQ(dim_x.size(), dim_target.size(),
"Input(X) rank and Input(Target) rank should be same.");
for (int i = 0; i < dim_x.size(); i++) {
PADDLE_ENFORCE_EQ(dim_x[i], dim_target[i],
"Input(X) and Input(Target) should in same shape.");
}
auto reduction = ctx->Attrs().Get<std::string>("reduction");
PADDLE_ENFORCE(
"mean" == reduction || "sum" == reduction || "batchmean" == reduction ||
"none" == reduction,
"Attr(reduction) can only be 'none'|'batchmean'|'sum'|'mean'.");
if ("none" == reduction) {
ctx->SetOutputDim("Loss", dim_x);
} else {
ctx->SetOutputDim("Loss", {1});
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
ctx.GetPlace());
}
};
class KLDivLossOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"The input tensor of KL divergence loss operator. "
"This is a tensor with shape of [N, *], where N is the "
"batch size, * means any number of additional dimensions.");
AddInput("Target",
"The tensor of KL divergence loss operator. "
"This is a tensor with shape of Input(X).");
AddOutput(
"Loss",
"The output KL divergence loss tensor. if Attr(reduction) is "
"'none', this tensor should be in same shape of of Input(X), else "
"this tensor should be in shape of [1].");
AddAttr<std::string>(
"reduction",
"The reduction type to apply to the output, available types "
"are 'none' | 'batchmean' | 'mean' | 'sum', 'none' for no "
"reduction, 'batchmean' for the sum of output divided by "
"batch size, 'mean' for the average value of all output, "
"'sum' for the sum of the output.")
.SetDefault("mean");
AddComment(R"DOC(
This operator calculates the Kullback-Leibler divergence loss
between Input(X) and Input(Target).
KL divergence loss is calculated as follows:
$$l(x, y) = y * (\log(y) - x)$$
While :math:`x` is Input(X) and :math:`y` is Input(Target).
While :attr:`reduction` is :attr:`none`, output loss is in
the same shape as Input(X), loss in each point is calculated
seperately and no reduction is applied.
While :attr:`reduction` is :attr:`mean`, output loss is in
shape of [1] and loss value is the mean value of all losses.
While :attr:`reduction` is :attr:`sum`, output loss is in
shape of [1] and loss value is the sum value of all losses.
While :attr:`reduction` is :attr:`batchmean`, output loss is
in shape of [1] and loss value is the sum value of all losses
divided by batch size.
)DOC");
}
};
class KLDivLossOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("Target"), "Input(Target) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Loss")),
"Input(Loss@GRAD) should not be null");
auto dim_x = ctx->GetInputDim("X");
if (ctx->HasOutput(framework::GradVarName("X"))) {
ctx->SetOutputDim(framework::GradVarName("X"), dim_x);
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
ctx.GetPlace());
}
};
class KLDivLossOpGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* op = new framework::OpDesc();
op->SetType("kldiv_loss_grad");
op->SetInput("X", Input("X"));
op->SetInput("Target", Input("Target"));
op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss"));
op->SetAttrMap(Attrs());
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
return std::unique_ptr<framework::OpDesc>(op);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(kldiv_loss, ops::KLDivLossOp, ops::KLDivLossOpMaker,
ops::KLDivLossOpGradMaker);
REGISTER_OPERATOR(kldiv_loss_grad, ops::KLDivLossOpGrad);
REGISTER_OP_CPU_KERNEL(
kldiv_loss, ops::KLDivLossKernel<paddle::platform::CPUDeviceContext, float>,
ops::KLDivLossKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
kldiv_loss_grad,
ops::KLDivLossGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::KLDivLossGradKernel<paddle::platform::CPUDeviceContext, double>);
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/kldiv_loss_op.h"
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(
kldiv_loss,
ops::KLDivLossKernel<paddle::platform::CUDADeviceContext, float>,
ops::KLDivLossKernel<paddle::platform::CUDADeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
kldiv_loss_grad,
ops::KLDivLossGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::KLDivLossGradKernel<paddle::platform::CUDADeviceContext, double>);
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/hostdevice.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
using Array1 = Eigen::DSizes<int64_t, 1>;
template <typename T>
struct KLDivLossForward {
HOSTDEVICE KLDivLossForward() {}
HOSTDEVICE T operator()(const T& target, const T& input) const {
if (target <= 0) {
return 0;
} else {
return target * (std::log(target) - input);
}
}
};
template <typename T>
struct KLDivLossBackward {
HOSTDEVICE KLDivLossBackward() {}
HOSTDEVICE T operator()(const T& target, const T& grad) const {
if (target <= 0) {
return 0;
} else {
return static_cast<T>(-1.) * grad;
}
}
};
template <typename DeviceContext, typename T>
class KLDivLossKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
auto* input = ctx.Input<Tensor>("X");
auto* target = ctx.Input<Tensor>("Target");
auto* loss = ctx.Output<Tensor>("Loss");
auto reduction = ctx.Attr<std::string>("reduction");
const int n = input->dims()[0];
loss->mutable_data<T>(ctx.GetPlace());
auto input_t = EigenVector<T>::Flatten(*input);
auto target_t = EigenVector<T>::Flatten(*target);
auto loss_t = EigenVector<T>::Flatten(*loss);
auto output = target_t.binaryExpr(input_t, KLDivLossForward<T>());
if ("none" == reduction) {
loss_t.device(place) = output;
} else if ("batchmean" == reduction) {
auto output_sum = output.sum().eval();
loss_t.device(place) = output_sum / output_sum.constant(n);
} else if ("mean" == reduction) {
loss_t.device(place) = output.mean();
} else if ("sum" == reduction) {
loss_t.device(place) = output.sum();
}
}
};
template <typename DeviceContext, typename T>
class KLDivLossGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
auto* target = ctx.Input<Tensor>("Target");
auto reduction = ctx.Attr<std::string>("reduction");
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* loss_grad = ctx.Input<Tensor>(framework::GradVarName("Loss"));
const int n = input_grad->dims()[0];
const int numel = input_grad->numel();
const int expand = numel / loss_grad->numel();
input_grad->mutable_data<T>(ctx.GetPlace());
auto target_t = EigenVector<T>::Flatten(*target);
auto input_grad_t = EigenVector<T>::Flatten(*input_grad);
auto loss_grad_t = EigenVector<T>::Flatten(*loss_grad);
auto loss_grad_expand = loss_grad_t.broadcast(Array1(expand));
auto grad_t = target_t * loss_grad_expand;
input_grad_t.device(place) =
target_t.binaryExpr(grad_t, KLDivLossBackward<T>());
if ("mean" == reduction) {
input_grad_t.device(place) = input_grad_t / static_cast<T>(numel);
} else if ("batchmean" == reduction) {
input_grad_t.device(place) = input_grad_t / static_cast<T>(n);
}
}
};
} // namespace operators
} // namespace paddle
...@@ -23,15 +23,16 @@ template <typename DeviceContext, typename T, bool is_test, ...@@ -23,15 +23,16 @@ template <typename DeviceContext, typename T, bool is_test,
typename Enable = void> typename Enable = void>
class SoftmaxFunctor { class SoftmaxFunctor {
public: public:
void operator()(const DeviceContext& context, const framework::Tensor* X, void operator()(const DeviceContext& context, const int axis_dim,
framework::Tensor* Y); const framework::Tensor* X, framework::Tensor* Y);
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class SoftmaxGradFunctor { class SoftmaxGradFunctor {
public: public:
void operator()(const DeviceContext& context, const framework::Tensor* y, void operator()(const DeviceContext& context, const int axis_dim,
const framework::Tensor* y_grad, framework::Tensor* x_grad); const framework::Tensor* y, const framework::Tensor* y_grad,
framework::Tensor* x_grad);
}; };
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
......
...@@ -36,8 +36,8 @@ struct ValueClip { ...@@ -36,8 +36,8 @@ struct ValueClip {
template <typename DeviceContext, typename T, bool is_test, typename Enable> template <typename DeviceContext, typename T, bool is_test, typename Enable>
void SoftmaxFunctor<DeviceContext, T, is_test, Enable>::operator()( void SoftmaxFunctor<DeviceContext, T, is_test, Enable>::operator()(
const DeviceContext& context, const framework::Tensor* X, const DeviceContext& context, const int axis_dim,
framework::Tensor* Y) { const framework::Tensor* X, framework::Tensor* Y) {
auto logits = EigenMatrix<T>::From(*X); auto logits = EigenMatrix<T>::From(*X);
auto softmax = EigenMatrix<T>::From(*Y); auto softmax = EigenMatrix<T>::From(*Y);
...@@ -46,10 +46,13 @@ void SoftmaxFunctor<DeviceContext, T, is_test, Enable>::operator()( ...@@ -46,10 +46,13 @@ void SoftmaxFunctor<DeviceContext, T, is_test, Enable>::operator()(
const int batch_size = logits.dimension(kBatchDim); const int batch_size = logits.dimension(kBatchDim);
const int num_classes = logits.dimension(kClassDim); const int num_classes = logits.dimension(kClassDim);
const int num_remain = num_classes / axis_dim;
Eigen::DSizes<int, 1> along_class(kClassDim); Eigen::DSizes<int, 1> along_class(kClassDim);
Eigen::DSizes<int, 2> batch_by_one(batch_size, 1); Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
Eigen::DSizes<int, 2> one_by_class(1, num_classes); Eigen::DSizes<int, 2> one_by_class(1, num_classes);
Eigen::DSizes<int, 3> batch_axis_remain(batch_size, axis_dim, num_remain);
Eigen::DSizes<int, 2> one_axis(1, axis_dim);
auto shifted_logits = (logits - auto shifted_logits = (logits -
logits.maximum(along_class) logits.maximum(along_class)
...@@ -60,11 +63,11 @@ void SoftmaxFunctor<DeviceContext, T, is_test, Enable>::operator()( ...@@ -60,11 +63,11 @@ void SoftmaxFunctor<DeviceContext, T, is_test, Enable>::operator()(
softmax.device(*context.eigen_device()) = shifted_logits.exp(); softmax.device(*context.eigen_device()) = shifted_logits.exp();
softmax.device(*context.eigen_device()) = (softmax * softmax.device(*context.eigen_device()) = (softmax *
softmax.sum(along_class) softmax.reshape(batch_axis_remain)
.sum(along_class)
.inverse() .inverse()
.eval() .eval()
.reshape(batch_by_one) .broadcast(one_axis));
.broadcast(one_by_class));
} }
template <class DeviceContext> template <class DeviceContext>
...@@ -73,8 +76,8 @@ using enable_if_CPU = typename std::enable_if< ...@@ -73,8 +76,8 @@ using enable_if_CPU = typename std::enable_if<
template <typename DeviceContext> template <typename DeviceContext>
class SoftmaxFunctor<DeviceContext, float, true, enable_if_CPU<DeviceContext>> { class SoftmaxFunctor<DeviceContext, float, true, enable_if_CPU<DeviceContext>> {
void operator()(const DeviceContext& context, const framework::Tensor* X, void operator()(const DeviceContext& context, const int axis_dim,
framework::Tensor* Y) { const framework::Tensor* X, framework::Tensor* Y) {
auto in_dims = X->dims(); auto in_dims = X->dims();
const float* in_data = X->data<float>(); const float* in_data = X->data<float>();
float* out_data = Y->data<float>(); float* out_data = Y->data<float>();
...@@ -84,14 +87,16 @@ class SoftmaxFunctor<DeviceContext, float, true, enable_if_CPU<DeviceContext>> { ...@@ -84,14 +87,16 @@ class SoftmaxFunctor<DeviceContext, float, true, enable_if_CPU<DeviceContext>> {
auto compute_softmax = auto compute_softmax =
jit::KernelFuncs<jit::SoftmaxTuple<float>, platform::CPUPlace>::Cache() jit::KernelFuncs<jit::SoftmaxTuple<float>, platform::CPUPlace>::Cache()
.At(in_dims[kClassDim]); .At(in_dims[kClassDim]);
compute_softmax(in_data, out_data, in_dims[kClassDim], in_dims[kBatchDim]); compute_softmax(in_data, out_data, in_dims[kClassDim], in_dims[kBatchDim],
in_dims[kClassDim] / axis_dim);
} }
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
void SoftmaxGradFunctor<DeviceContext, T>::operator()( void SoftmaxGradFunctor<DeviceContext, T>::operator()(
const DeviceContext& context, const framework::Tensor* y, const DeviceContext& context, const int axis_dim,
const framework::Tensor* y_grad, framework::Tensor* x_grad) { const framework::Tensor* y, const framework::Tensor* y_grad,
framework::Tensor* x_grad) {
auto softmax = EigenMatrix<T>::From(*y); auto softmax = EigenMatrix<T>::From(*y);
auto softmax_grad = EigenMatrix<T>::From(*y_grad); auto softmax_grad = EigenMatrix<T>::From(*y_grad);
auto logits_grad = EigenMatrix<T>::From(*x_grad); auto logits_grad = EigenMatrix<T>::From(*x_grad);
...@@ -101,16 +106,19 @@ void SoftmaxGradFunctor<DeviceContext, T>::operator()( ...@@ -101,16 +106,19 @@ void SoftmaxGradFunctor<DeviceContext, T>::operator()(
const int batch_size = softmax.dimension(kBatchDim); const int batch_size = softmax.dimension(kBatchDim);
const int num_classes = softmax.dimension(kClassDim); const int num_classes = softmax.dimension(kClassDim);
const int num_remain = num_classes / axis_dim;
Eigen::DSizes<int, 1> along_class(kClassDim); Eigen::DSizes<int, 1> along_class(kClassDim);
Eigen::DSizes<int, 2> batch_by_one(batch_size, 1); Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
Eigen::DSizes<int, 2> one_by_class(1, num_classes); Eigen::DSizes<int, 2> one_by_class(1, num_classes);
Eigen::DSizes<int, 3> batch_axis_remain(batch_size, axis_dim, num_remain);
Eigen::DSizes<int, 2> one_axis(1, axis_dim);
auto dot = (softmax * softmax_grad) auto dot = (softmax * softmax_grad)
.reshape(batch_axis_remain)
.sum(along_class) .sum(along_class)
.eval() .eval()
.reshape(batch_by_one) .broadcast(one_axis);
.broadcast(one_by_class);
logits_grad.device(*context.eigen_device()) = (softmax_grad - dot) * softmax; logits_grad.device(*context.eigen_device()) = (softmax_grad - dot) * softmax;
} }
......
...@@ -39,6 +39,20 @@ class SoftmaxOp : public framework::OperatorWithKernel { ...@@ -39,6 +39,20 @@ class SoftmaxOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SoftmaxOp should not be null."); "Output(Out) of SoftmaxOp should not be null.");
auto dim_x = ctx->GetInputDim("X");
auto rank_x = dim_x.size();
auto axis = ctx->Attrs().Get<int>("axis");
PADDLE_ENFORCE(axis >= -rank_x && axis < rank_x,
"Attr(axis) value should be in range [-R, R-1], "
"R is the rank of Input(X).");
auto use_cudnn = ctx->Attrs().Get<bool>("use_cudnn");
auto use_mkldnn = ctx->Attrs().Get<bool>("use_mkldnn");
if (axis != rank_x - 1 && axis != -1) {
PADDLE_ENFORCE(!use_cudnn, "CUDNN kernel only support axis as -1.");
PADDLE_ENFORCE(!use_mkldnn, "MKLDNN kernel only support axis as -1.");
}
ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
...@@ -80,8 +94,12 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -80,8 +94,12 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override { void Make() override {
AddInput("X", AddInput("X",
"The input tensor of softmax, " "The input tensor of softmax, "
"whose last dimension is the input_feature_dimensions."); "whose dimension :attr:`axis` is the input_feature_dimensions.");
AddOutput("Out", "The normalized values with the same shape as X."); AddOutput("Out", "The normalized values with the same shape as X.");
AddAttr<int>("axis",
"The dimension index of Input(x) to perform softmax,"
"default -1 for last dimension")
.SetDefault(-1);
AddAttr<bool>( AddAttr<bool>(
"use_cudnn", "use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn") "(bool, default false) Only used in cudnn kernel, need install cudnn")
...@@ -106,12 +124,13 @@ Softmax Operator. ...@@ -106,12 +124,13 @@ Softmax Operator.
The input of the softmax operator is a tensor of any rank. The output tensor The input of the softmax operator is a tensor of any rank. The output tensor
has the same shape as the input. has the same shape as the input.
The input tensor will first be logically flattened to a 2-D matrix. The matrix's The dimension :attr:`axis` of the input tensor will be permuted to the last.
second dimension(row length) is as same as the last dimension of the input Then the input tensor will be logically flattened to a 2-D matrix. The matrix's
second dimension(row length) is as same as the dimension :attr:`axis` of the input
tensor, and the first dimension(column length) is the product of all other tensor, and the first dimension(column length) is the product of all other
dimensions of the input tensor. For each row of the matrix, the softmax operator dimensions of the input tensor. For each row of the matrix, the softmax operator
squashes the K-dimensional(K is the width of the matrix, which is also the size squashes the K-dimensional(K is the width of the matrix, which is also the size
of the input tensor's last dimension) vector of arbitrary real values to a of the input tensor's dimension :attr:`axis`) vector of arbitrary real values to a
K-dimensional vector of real values in the range [0, 1] that add up to 1. K-dimensional vector of real values in the range [0, 1] that add up to 1.
It computes the exponential of the given dimension and the sum of exponential It computes the exponential of the given dimension and the sum of exponential
values of all the other dimensions in the K-dimensional vector input. values of all the other dimensions in the K-dimensional vector input.
......
...@@ -20,6 +20,30 @@ namespace paddle { ...@@ -20,6 +20,30 @@ namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
using DDim = framework::DDim;
static inline int CanonicalAxis(const int axis, const int rank) {
if (axis < 0) {
return axis + rank;
}
return axis;
}
static inline int SizeToAxis(const int axis, DDim dims) {
int size = 1;
for (int i = 0; i < axis; i++) {
size *= dims[i];
}
return size;
}
static inline int SizeFromAxis(const int axis, DDim dims) {
int size = 1;
for (int i = axis; i < dims.size(); i++) {
size *= dims[i];
}
return size;
}
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class SoftmaxKernel : public framework::OpKernel<T> { class SoftmaxKernel : public framework::OpKernel<T> {
...@@ -27,20 +51,27 @@ class SoftmaxKernel : public framework::OpKernel<T> { ...@@ -27,20 +51,27 @@ class SoftmaxKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<Tensor>("X"); auto* X = context.Input<Tensor>("X");
auto* Out = context.Output<Tensor>("Out"); auto* Out = context.Output<Tensor>("Out");
const int rank = X->dims().size();
const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
int axis_dim = X->dims()[axis];
// allocate memory on device. // allocate memory on device.
Out->mutable_data<T>(context.GetPlace()); Out->mutable_data<T>(context.GetPlace());
int rank = X->dims().size(); const int n = SizeToAxis(axis, X->dims());
Tensor X_2d = framework::ReshapeToMatrix(*X, rank - 1); const int d = SizeFromAxis(axis, X->dims());
Tensor Out_2d = framework::ReshapeToMatrix(*Out, rank - 1); Tensor X_2d, Out_2d;
X_2d.ShareDataWith(*X).Resize({n, d});
Out_2d.ShareDataWith(*Out).Resize({n, d});
#ifdef PADDLE_ON_INFERENCE #ifdef PADDLE_ON_INFERENCE
math::SoftmaxFunctor<DeviceContext, T, true>()( math::SoftmaxFunctor<DeviceContext, T, true>()(
context.template device_context<DeviceContext>(), &X_2d, &Out_2d); context.template device_context<DeviceContext>(), axis_dim, &X_2d,
&Out_2d);
#else #else
math::SoftmaxFunctor<DeviceContext, T, false>()( math::SoftmaxFunctor<DeviceContext, T, false>()(
context.template device_context<DeviceContext>(), &X_2d, &Out_2d); context.template device_context<DeviceContext>(), axis_dim, &X_2d,
&Out_2d);
#endif #endif
} }
}; };
...@@ -52,18 +83,23 @@ class SoftmaxGradKernel : public framework::OpKernel<T> { ...@@ -52,18 +83,23 @@ class SoftmaxGradKernel : public framework::OpKernel<T> {
auto* Out = context.Input<Tensor>("Out"); auto* Out = context.Input<Tensor>("Out");
auto* dOut = context.Input<Tensor>(framework::GradVarName("Out")); auto* dOut = context.Input<Tensor>(framework::GradVarName("Out"));
auto* dX = context.Output<Tensor>(framework::GradVarName("X")); auto* dX = context.Output<Tensor>(framework::GradVarName("X"));
const int rank = dX->dims().size();
const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
int axis_dim = dX->dims()[axis];
// allocate memory on device. // allocate memory on device.
dX->mutable_data<T>(context.GetPlace()); dX->mutable_data<T>(context.GetPlace());
int rank = Out->dims().size(); const int n = SizeToAxis(axis, dX->dims());
Tensor Out_2d = framework::ReshapeToMatrix(*Out, rank - 1); const int d = SizeFromAxis(axis, dX->dims());
Tensor dOut_2d = framework::ReshapeToMatrix(*dOut, rank - 1); Tensor dX_2d, Out_2d, dOut_2d;
Tensor dX_2d = framework::ReshapeToMatrix(*dX, rank - 1); dX_2d.ShareDataWith(*dX).Resize({n, d});
Out_2d.ShareDataWith(*Out).Resize({n, d});
dOut_2d.ShareDataWith(*dOut).Resize({n, d});
math::SoftmaxGradFunctor<DeviceContext, T>()( math::SoftmaxGradFunctor<DeviceContext, T>()(
context.template device_context<DeviceContext>(), &Out_2d, &dOut_2d, context.template device_context<DeviceContext>(), axis_dim, &Out_2d,
&dX_2d); &dOut_2d, &dX_2d);
} }
}; };
......
...@@ -40,10 +40,12 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> { ...@@ -40,10 +40,12 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
softmax->mutable_data<T>(context.GetPlace()); softmax->mutable_data<T>(context.GetPlace());
loss->mutable_data<T>(context.GetPlace()); loss->mutable_data<T>(context.GetPlace());
int axis_dim = logits->dims()[logits->dims().size() - 1];
auto& dev_ctx = auto& dev_ctx =
context.template device_context<platform::CPUDeviceContext>(); context.template device_context<platform::CPUDeviceContext>();
math::SoftmaxFunctor<platform::CPUDeviceContext, T, false>()( math::SoftmaxFunctor<platform::CPUDeviceContext, T, false>()(
dev_ctx, logits, softmax); dev_ctx, axis_dim, logits, softmax);
math::CrossEntropyFunctor<platform::CPUDeviceContext, T>()( math::CrossEntropyFunctor<platform::CPUDeviceContext, T>()(
dev_ctx, loss, softmax, labels, context.Attr<bool>("soft_label"), dev_ctx, loss, softmax, labels, context.Attr<bool>("soft_label"),
context.Attr<int>("ignore_index")); context.Attr<int>("ignore_index"));
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/temporal_shift_op.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class TemporalShiftOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of TemporalShiftOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of TemporalShiftOp should not be null.");
auto dim_x = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(dim_x.size(), 4,
"Input(X) rank should be 4 in shape of [N*T, C, H, W].");
int seg_num = ctx->Attrs().Get<int>("seg_num");
float shift_ratio = ctx->Attrs().Get<float>("shift_ratio");
PADDLE_ENFORCE_GT(seg_num, 0, "Attr(seg_num) should be greater than 0.");
PADDLE_ENFORCE(shift_ratio > 0 || shift_ratio < .5,
"Attr(shift_ratio) should be greater than 0 and less "
"than 0.5.");
if (ctx->IsRuntime()) {
PADDLE_ENFORCE_EQ(
dim_x[0] % seg_num, 0,
"Input(X) dims[0] should be divided exactly by Attr(seg_num).");
}
ctx->SetOutputDim("Out", dim_x);
ctx->ShareLoD("X", "Out");
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
ctx.GetPlace());
}
};
class TemporalShiftOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"The input tensor of temporal shift operator. "
"This is a 4-D tensor with shape of [N*T, C, H, W]. "
"While N is the batch size, T is the temporal segment "
"number, C is the channel number, H is the height of "
"features and W is the width of features.");
AddOutput("Out",
"The output tensor of temporal shift operator. "
"This is a 4-D tensor in the same shape with Input(X).");
AddAttr<int>("seg_num",
"The temporal segment number, this should be a positive "
"integer.");
AddAttr<float>(
"shift_ratio",
"The shift ratio of the channels, the first :attr:`shift_ratio` part "
"of channels will be shifted by -1 along the temporal dimension, "
"and the second :attr:`shift_ratio` part of channels will be shifted "
"by 1 along the temporal dimension. Default 0.25.")
.SetDefault(0.25);
AddComment(R"DOC(
This operator calculates the temporal shifting features for Input(X).
Input(X) should be in shape of [N*T, C, H, W], while N is the batch
size, T is the temporal segment number specified by :attr:`seg_num`,
C is the channel number, H and W is the height and width of features.
Temporal Shifting is calculated as follows:
Step 1: Reshape Input(X) to [N, T, C, H, W].
Step 2: Pad 0 to reshaping result in the 2nd(T) dimension with
padding width as 1 on each side, padding result will be in shape
of [N, T+2, C, H, W].
Step 3: Assume :attr:`shift_ratio` is :math:`1/4`, slice padding
result as follows:
$$
slice1 = x[:, :T, :C/4, :, :]
$$
$$
slice2 = x[:, 2:T+2, C/4:C/2, :, :]
$$
$$
slice3 = x[:, 1:T+1, C/2:, :, :]
$$
Step 4: Concatenate three slices along the 3rd(C) dimension and
reshape result to [N*T, C, H, W].
For details of temporal shifting, please refer to paper:
`Temporal Shift Module <http://arxiv.org/abs/1811.08383>`_ .
)DOC");
}
};
class TemporalShiftOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
auto dim_x = ctx->GetInputDim("X");
if (ctx->HasOutput(framework::GradVarName("X"))) {
ctx->SetOutputDim(framework::GradVarName("X"), dim_x);
}
}
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
ctx.GetPlace());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(temporal_shift, ops::TemporalShiftOp,
ops::TemporalShiftOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(temporal_shift_grad, ops::TemporalShiftOpGrad);
REGISTER_OP_CPU_KERNEL(temporal_shift, ops::TemporalShiftKernel<float>,
ops::TemporalShiftKernel<double>);
REGISTER_OP_CPU_KERNEL(temporal_shift_grad, ops::TemporalShiftGradKernel<float>,
ops::TemporalShiftGradKernel<double>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/temporal_shift_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
namespace paddle {
namespace operators {
using framework::Tensor;
template <typename T>
__global__ void KeTemporalShiftFw(const T* input, T* output, const int ntchw,
const int tchw, const int chw, const int hw,
const int w, const int t, const int c,
const float shift_ratio) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
int src_it = 0;
for (; tid < ntchw; tid += stride) {
int in = tid / tchw;
int it = (tid % tchw) / chw;
int ic = (tid % chw) / hw;
int ih = (tid % hw) / w;
int iw = tid % w;
const int c1 = static_cast<T>(c * shift_ratio);
const int c2 = static_cast<T>(c * 2 * shift_ratio);
if (ic < c1) {
src_it = it - 1;
} else if (ic < c2) {
src_it = it + 1;
} else {
src_it = it;
}
if (src_it < 0 || src_it >= t) {
output[tid] = 0;
} else {
int src_idx = GetEntryIndex(in, src_it, ic, ih, iw, tchw, chw, hw, w);
output[tid] = input[src_idx];
}
}
}
template <typename T>
__global__ void KeTemporalShiftBw(const T* output_grad, T* input_grad,
const int ntchw, const int tchw,
const int chw, const int hw, const int w,
const int t, const int c,
const float shift_ratio) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
int src_it = 0;
for (; tid < ntchw; tid += stride) {
int in = tid / tchw;
int it = (tid % tchw) / chw;
int ic = (tid % chw) / hw;
int ih = (tid % hw) / w;
int iw = tid % w;
const int c1 = static_cast<T>(c * shift_ratio);
const int c2 = static_cast<T>(c * 2 * shift_ratio);
if (ic < c1) {
src_it = it - 1;
} else if (ic < c2) {
src_it = it + 1;
} else {
src_it = it;
}
if (src_it >= 0 && src_it < t) {
int src_idx = GetEntryIndex(in, src_it, ic, ih, iw, tchw, chw, hw, w);
input_grad[src_idx] = output_grad[tid];
}
}
}
template <typename T>
class TemporalShiftOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"This kernel only runs on GPU device.");
auto* input = ctx.Input<Tensor>("X");
auto* output = ctx.Output<Tensor>("Out");
int t = ctx.Attr<int>("seg_num");
float shift_ratio = ctx.Attr<float>("shift_ratio");
const int nt = input->dims()[0];
const int c = input->dims()[1];
const int h = input->dims()[2];
const int w = input->dims()[3];
const int hw = h * w;
const int chw = c * hw;
const int tchw = t * chw;
const int ntchw = nt * chw;
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>({nt, c, h, w}, ctx.GetPlace());
int pixelNum = nt * chw;
int grid_dim = (pixelNum + 512 - 1) / 512;
grid_dim = grid_dim > 8 ? 8 : grid_dim;
KeTemporalShiftFw<
T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
input_data, output_data, ntchw, tchw, chw, hw, w, t, c, shift_ratio);
}
};
template <typename T>
class TemporalShiftGradOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
int t = ctx.Attr<int>("seg_num");
float shift_ratio = ctx.Attr<float>("shift_ratio");
const int nt = output_grad->dims()[0];
const int c = output_grad->dims()[1];
const int h = output_grad->dims()[2];
const int w = output_grad->dims()[3];
const int hw = h * w;
const int chw = c * hw;
const int tchw = t * chw;
const int ntchw = nt * chw;
const T* output_grad_data = output_grad->data<T>();
T* input_grad_data =
input_grad->mutable_data<T>({nt, c, h, w}, ctx.GetPlace());
math::SetConstant<platform::CUDADeviceContext, T>()(
ctx.template device_context<platform::CUDADeviceContext>(), input_grad,
static_cast<T>(0));
int pixelNum = nt * chw;
int grid_dim = (pixelNum + 512 - 1) / 512;
grid_dim = grid_dim > 8 ? 8 : grid_dim;
KeTemporalShiftBw<
T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
output_grad_data, input_grad_data, ntchw, tchw, chw, hw, w, t, c,
shift_ratio);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(temporal_shift, ops::TemporalShiftOpCUDAKernel<float>,
ops::TemporalShiftOpCUDAKernel<double>);
REGISTER_OP_CUDA_KERNEL(temporal_shift_grad,
ops::TemporalShiftGradOpCUDAKernel<float>,
ops::TemporalShiftGradOpCUDAKernel<double>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
static HOSTDEVICE inline int GetEntryIndex(int in, int it, int ic, int ih,
int iw, const int tchw,
const int chw, const int hw,
const int w) {
return in * tchw + it * chw + ic * hw + ih * w + iw;
}
template <typename T>
class TemporalShiftKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("X");
auto* output = ctx.Output<Tensor>("Out");
int t = ctx.Attr<int>("seg_num");
float shift_ratio = ctx.Attr<float>("shift_ratio");
const int nt = input->dims()[0];
const int c = input->dims()[1];
const int h = input->dims()[2];
const int w = input->dims()[3];
const int c1 = static_cast<int>(c * shift_ratio);
const int c2 = static_cast<int>(c * 2 * shift_ratio);
const int hw = h * w;
const int chw = c * hw;
const int tchw = t * chw;
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>({nt, c, h, w}, ctx.GetPlace());
int src_it = 0;
for (int i = 0; i < output->numel(); i++) {
int in = i / tchw;
int it = (i % tchw) / chw;
int ic = (i % chw) / hw;
int ih = (i % hw) / w;
int iw = i % w;
if (ic < c1) {
src_it = it - 1;
} else if (ic < c2) {
src_it = it + 1;
} else {
src_it = it;
}
if (src_it < 0 || src_it >= t) {
output_data[i] = 0;
} else {
int src_idx = GetEntryIndex(in, src_it, ic, ih, iw, tchw, chw, hw, w);
output_data[i] = input_data[src_idx];
}
}
}
};
template <typename T>
class TemporalShiftGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
int t = ctx.Attr<int>("seg_num");
float shift_ratio = ctx.Attr<float>("shift_ratio");
const int nt = output_grad->dims()[0];
const int c = output_grad->dims()[1];
const int h = output_grad->dims()[2];
const int w = output_grad->dims()[3];
const int c1 = static_cast<int>(c * shift_ratio);
const int c2 = static_cast<int>(c * 2 * shift_ratio);
const int hw = h * w;
const int chw = c * hw;
const int tchw = t * chw;
const T* output_grad_data = output_grad->data<T>();
T* input_grad_data =
input_grad->mutable_data<T>({nt, c, h, w}, ctx.GetPlace());
memset(input_grad_data, 0, input_grad->numel() * sizeof(T));
int src_it = 0;
for (int i = 0; i < output_grad->numel(); i++) {
int in = i / tchw;
int it = (i % tchw) / chw;
int ic = (i % chw) / hw;
int ih = (i % hw) / w;
int iw = i % w;
if (ic < c1) {
src_it = it - 1;
} else if (ic < c2) {
src_it = it + 1;
} else {
src_it = it;
}
if (src_it >= 0 && src_it < t) {
int src_idx = GetEntryIndex(in, src_it, ic, ih, iw, tchw, chw, hw, w);
input_grad_data[src_idx] = output_grad_data[i];
}
}
}
};
} // namespace operators
} // namespace paddle
...@@ -52,6 +52,7 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -52,6 +52,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
std::string engine_key_; std::string engine_key_;
std::string engine_serialized_data_; std::string engine_serialized_data_;
bool calibration_mode_; bool calibration_mode_;
int device_id_;
public: public:
TensorRTEngineOp(const std::string &type, TensorRTEngineOp(const std::string &type,
...@@ -62,6 +63,7 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -62,6 +63,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
input_names_ = Inputs("Xs"); input_names_ = Inputs("Xs");
max_batch_size_ = Attr<int>("max_batch_size"); max_batch_size_ = Attr<int>("max_batch_size");
workspace_size_ = Attr<int>("workspace_size"); workspace_size_ = Attr<int>("workspace_size");
device_id_ = Attr<int>("gpu_id");
enable_int8_ = Attr<bool>("enable_int8"); enable_int8_ = Attr<bool>("enable_int8");
calibration_data_ = Attr<std::string>("calibration_data"); calibration_data_ = Attr<std::string>("calibration_data");
engine_key_ = Attr<std::string>("engine_key"); engine_key_ = Attr<std::string>("engine_key");
...@@ -79,6 +81,17 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -79,6 +81,17 @@ class TensorRTEngineOp : public framework::OperatorBase {
if (enable_int8_ && calibration_data_.size()) { if (enable_int8_ && calibration_data_.size()) {
calibrator_.reset(new TRTInt8Calibrator(calibration_data_)); calibrator_.reset(new TRTInt8Calibrator(calibration_data_));
} }
if (!calibration_mode_ && !engine_serialized_data_.empty()) {
trt_engine_.reset(new inference::tensorrt::TensorRTEngine(
max_batch_size_, workspace_size_, enable_int8_, calibrator_.get(),
device_id_));
PADDLE_ENFORCE(engine_serialized_data_.size(),
"TRT serialized data should not be empty here,"
"there must be error when generate serialized data in TRT "
"subgraph detect pass.");
trt_engine_->Deserialize(engine_serialized_data_);
}
} }
protected: protected:
...@@ -225,12 +238,8 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -225,12 +238,8 @@ class TensorRTEngineOp : public framework::OperatorBase {
if (!trt_engine_) { if (!trt_engine_) {
trt_engine_.reset(new inference::tensorrt::TensorRTEngine( trt_engine_.reset(new inference::tensorrt::TensorRTEngine(
max_batch_size_, workspace_size_, enable_int8_, calibrator_.get(), max_batch_size_, workspace_size_, enable_int8_, calibrator_.get(),
boost::get<platform::CUDAPlace>(dev_place).device)); device_id_));
if (!engine_serialized_data_.empty()) { PrepareTRTEngine(scope, trt_engine_.get());
trt_engine_->Deserialize(engine_serialized_data_);
} else {
PrepareTRTEngine(scope, trt_engine_.get());
}
} }
return trt_engine_.get(); return trt_engine_.get();
} }
......
...@@ -108,6 +108,8 @@ TEST(TensorRTEngineOp, manual) { ...@@ -108,6 +108,8 @@ TEST(TensorRTEngineOp, manual) {
std::vector<std::string>({"z0"})); std::vector<std::string>({"z0"}));
engine_op_desc.SetAttr("subgraph", std::string(block_->SerializeAsString())); engine_op_desc.SetAttr("subgraph", std::string(block_->SerializeAsString()));
engine_op_desc.SetAttr("engine_serialized_data", std::string("")); engine_op_desc.SetAttr("engine_serialized_data", std::string(""));
int device_id = 0;
engine_op_desc.SetAttr("gpu_id", device_id);
LOG(INFO) << "create engine op"; LOG(INFO) << "create engine op";
auto engine_op = framework::OpRegistry::CreateOp(engine_op_desc); auto engine_op = framework::OpRegistry::CreateOp(engine_op_desc);
...@@ -204,6 +206,8 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) { ...@@ -204,6 +206,8 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
std::vector<std::string>({"z3"})); std::vector<std::string>({"z3"}));
engine_op_desc.SetAttr("subgraph", std::string(block_->SerializeAsString())); engine_op_desc.SetAttr("subgraph", std::string(block_->SerializeAsString()));
engine_op_desc.SetAttr("engine_serialized_data", std::string("")); engine_op_desc.SetAttr("engine_serialized_data", std::string(""));
int device_id = 0;
engine_op_desc.SetAttr("gpu_id", device_id);
auto engine_op = framework::OpRegistry::CreateOp(engine_op_desc); auto engine_op = framework::OpRegistry::CreateOp(engine_op_desc);
......
...@@ -67,9 +67,11 @@ class CudnnCTCKernel : public framework::OpKernel<T> { ...@@ -67,9 +67,11 @@ class CudnnCTCKernel : public framework::OpKernel<T> {
softmax_logits.mutable_data<T>(logits->dims(), ctx.GetPlace()); softmax_logits.mutable_data<T>(logits->dims(), ctx.GetPlace());
softmax_logits.set_lod(logits_lod); softmax_logits.set_lod(logits_lod);
int rank = logits->dims().size(); int rank = logits->dims().size();
int axis_dim = logits->dims()[rank - 1];
Tensor in_2d = framework::ReshapeToMatrix(*logits, rank - 1); Tensor in_2d = framework::ReshapeToMatrix(*logits, rank - 1);
Tensor out_2d = framework::ReshapeToMatrix(softmax_logits, rank - 1); Tensor out_2d = framework::ReshapeToMatrix(softmax_logits, rank - 1);
math::SoftmaxFunctor<DeviceContext, T, false>()(dev_ctx, &in_2d, &out_2d); math::SoftmaxFunctor<DeviceContext, T, false>()(dev_ctx, axis_dim, &in_2d,
&out_2d);
// ctc needs sequences data stored in transposed padding format // ctc needs sequences data stored in transposed padding format
// logits and grad using padding data of layout 'TNC' // logits and grad using padding data of layout 'TNC'
......
...@@ -44,9 +44,12 @@ add_subdirectory(dynload) ...@@ -44,9 +44,12 @@ add_subdirectory(dynload)
cc_library(cpu_helper SRCS cpu_helper.cc DEPS cblas enforce) cc_library(cpu_helper SRCS cpu_helper.cc DEPS cblas enforce)
cc_test(cpu_helper_test SRCS cpu_helper_test.cc DEPS cpu_helper) cc_test(cpu_helper_test SRCS cpu_helper_test.cc DEPS cpu_helper)
set(dgc_deps "")
IF(WITH_GPU) IF(WITH_GPU)
set(GPU_CTX_DEPS dynload_cuda dynamic_loader) set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
set(dgc_deps dgc) if(NOT WIN32)
set(dgc_deps dgc)
endif()
ELSE() ELSE()
set(dgc_deps) set(dgc_deps)
ENDIF() ENDIF()
......
...@@ -34,7 +34,7 @@ from . import io ...@@ -34,7 +34,7 @@ from . import io
from . import evaluator from . import evaluator
from . import initializer from . import initializer
from . import layers from . import layers
from . import imperative from . import dygraph
from . import contrib from . import contrib
from . import nets from . import nets
from . import optimizer from . import optimizer
...@@ -71,7 +71,7 @@ __all__ = framework.__all__ + executor.__all__ + \ ...@@ -71,7 +71,7 @@ __all__ = framework.__all__ + executor.__all__ + \
'initializer', 'initializer',
'layers', 'layers',
'contrib', 'contrib',
'imperative', 'dygraph',
'transpiler', 'transpiler',
'nets', 'nets',
'optimizer', 'optimizer',
...@@ -180,7 +180,7 @@ def __bootstrap__(): ...@@ -180,7 +180,7 @@ def __bootstrap__():
'cudnn_exhaustive_search', 'memory_optimize_debug', 'selected_gpus', 'cudnn_exhaustive_search', 'memory_optimize_debug', 'selected_gpus',
'sync_nccl_allreduce', 'limit_of_tmp_allocation', 'sync_nccl_allreduce', 'limit_of_tmp_allocation',
'times_excess_than_required_tmp_allocation', 'times_excess_than_required_tmp_allocation',
'enable_inplace_whitelist' 'enable_inplace_whitelist', 'cudnn_batchnorm_spatial_persistent'
] ]
core.init_gflags([sys.argv[0]] + core.init_gflags([sys.argv[0]] +
["--tryfromenv=" + ",".join(read_env_flags)]) ["--tryfromenv=" + ",".join(read_env_flags)])
......
...@@ -30,6 +30,8 @@ from . import slim ...@@ -30,6 +30,8 @@ from . import slim
from .slim import * from .slim import *
from . import utils from . import utils
from .utils import * from .utils import *
from . import extend_optimizer
from .extend_optimizer import *
__all__ = [] __all__ = []
__all__ += decoder.__all__ __all__ += decoder.__all__
...@@ -40,3 +42,4 @@ __all__ += int8_inference.__all__ ...@@ -40,3 +42,4 @@ __all__ += int8_inference.__all__
__all__ += reader.__all__ __all__ += reader.__all__
__all__ += slim.__all__ __all__ += slim.__all__
__all__ += utils.__all__ __all__ += utils.__all__
__all__ += extend_optimizer.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from . import extend_optimizer_with_weight_decay
from .extend_optimizer_with_weight_decay import *
__all__ = []
__all__ += extend_optimizer_with_weight_decay.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid
from paddle.fluid import framework as framework
__all__ = ["extend_with_decoupled_weight_decay"]
class DecoupledWeightDecay(object):
def __init__(self, coeff=0.0, apply_decay_param_fun=None, **kwargs):
if not isinstance(coeff, float) and \
not isinstance(coeff, framework.Variable):
raise TypeError("coeff should be float or Variable.")
self._params_name = set()
self._apply_decay_param_fun = apply_decay_param_fun
self._coeff = coeff
super(DecoupledWeightDecay, self).__init__(**kwargs)
def _scale_parameters(self, params_and_grads):
"""
Adds weight decay ops.
scaled_parameter = parameter * coeff
Args:
params_and_grads: A list of (parameters, gradients) pairs,
the parameters need to decay.
Raises:
Exception: The type of coeff and parameter is not consistent.
"""
if isinstance(self._coeff, float) and self._coeff == 0.0:
return
scaled_params = []
for param, grad in params_and_grads:
# If no gradient then we don't need to do anything
if grad is None:
continue
if self._apply_decay_param_fun is not None \
and not self._apply_decay_param_fun(param.name):
continue
if isinstance(self._coeff, float):
assert param.dtype is not paddle.fluid.core.VarDesc.VarType.FP32, \
"the type of coeff(float) and parameter(%s) is not consistent."%(self._coeff.dtype)
else:
assert self._coeff.dtype == param.dtype, \
"the type of coeff(%s) and parameter(%s) is not consistent."%(self._coeff.dtype, param.dtype)
with param.block.program._optimized_guard(
[param, grad]), framework.name_scope('weight decay'):
assert param.name not in self._params_name
scaled_params.append((param, grad, param * self._coeff))
self._params_name.add(param.name)
return scaled_params
def backward(self, **kargs):
return super(DecoupledWeightDecay, self).backward(**kargs)
def apply_optimize(self, **kargs):
return super(DecoupledWeightDecay, self).apply_optimize(**kargs)
def minimize(self,
loss,
startup_program=None,
parameter_list=None,
no_grad_set=None):
params_grads = self.backward(
loss=loss,
startup_program=startup_program,
parameter_list=parameter_list,
no_grad_set=no_grad_set)
scaled_params = self._scale_parameters(params_grads)
for p_grad_sgrad in scaled_params:
param, grad, scaled_param = p_grad_sgrad
with param.block.program._optimized_guard(
[param, grad]), framework.name_scope('weight decay'):
updated_param = paddle.fluid.layers.elementwise_sub(
x=param, y=scaled_param)
paddle.fluid.layers.assign(input=updated_param, output=param)
optimize_ops = self.apply_optimize(
loss=loss,
params_grads=params_grads,
startup_program=startup_program)
return optimize_ops, params_grads
def __str__(self):
return " ".join(["Weight Decay, params:", ",".join(self._params_name)])
def extend_with_decoupled_weight_decay(base_optimizer):
"""
extend_with_decoupled_weight_decay is a decorator function, it returns an
optimizer class with decoupled weight decay. The returned optimizer will
apply weight decay on the optimized parameters with the parameters before
optimization, i.e: new_parameter = optimized_parameter - parameter * coeff.
The details of decoupled weight decay yplease refer to this
`DECOUPLED WEIGHT DECAY REGULARIZATION <https://arxiv.org/pdf/1711.05101.pdf>`_.
Args:
base_optimizer (Optimizer): The base_optimizer should be a derived class of Optimizer.
Returns:
OptimizerWithDecoupledWeightDecay: the optimizer with decouple weight decay.
Examples:
.. code-block:: python
AdamW = fluid.contrib.extend_with_decoupled_weight_decay(
fluid.optimizer.Adam)
optimizer = AdamW(learning_rate=0.1,
weight_decay=0.01)
optimizer.minimize(cost)
"""
if not issubclass(base_optimizer, paddle.fluid.optimizer.Optimizer):
raise TypeError(
"The input(base_optimizer) should be a derived class of Optimizer.")
class OptimizerWithDecoupledWeightDecay(DecoupledWeightDecay,
base_optimizer):
"""
OptimizerWithDecoupledWeightDecay is used to update the optimized parameters
with the parameters before optimization. For more information, please refer:
https://arxiv.org/pdf/1711.05101.pdf.
Args:
weight_decay (float|Variable): The weight decay coefficient, it can be
float or Variable.
apply_decay_param_fun (function|None): If it is not None,
only variables that makes apply_decay_param_fun(variable)==True
will be updated. It only works when we want to specify variables.
Default: None.
"""
def __init__(self, weight_decay, apply_decay_param_fun=None, **kwargs):
super(OptimizerWithDecoupledWeightDecay, self).__init__(
weight_decay, apply_decay_param_fun, **kwargs)
return OptimizerWithDecoupledWeightDecay
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
Example:
>>from paddle.fluid.contrib.model_stat import summary
>>main_program = ...
>>summary(main_program)
+-----+------------+----------------+----------------+---------+------------+
| No. | TYPE | INPUT | OUTPUT | PARAMs | FLOPs |
+-----+------------+----------------+----------------+---------+------------+
| 0 | conv2d | (3, 200, 200) | (64, 100, 100) | 9408 | 188160000 |
| 1 | batch_norm | (64, 100, 100) | (64, 100, 100) | 256 | 640000 |
| 2 | relu | (64, 100, 100) | (64, 100, 100) | 0 | 640000 |
| 3 | pool2d | (64, 100, 100) | (64, 50, 50) | 0 | 1440000 |
...
| 176 | conv2d | (512, 7, 7) | (512, 7, 7) | 2359296 | 231211008 |
| 177 | relu | (512, 7, 7) | (512, 7, 7) | 0 | 25088 |
| 178 | conv2d | (512, 7, 7) | (2048, 7, 7) | 1048576 | 102760448 |
| 179 | relu | (2048, 7, 7) | (2048, 7, 7) | 0 | 100352 |
| 180 | pool2d | (2048, 7, 7) | (2048, 1, 1) | 0 | 100352 |
+-----+------------+----------------+----------------+---------+------------+
Total PARAMs: 48017344(0.0480G)
Total FLOPs: 11692747751(11.69G)
'''
from collections import OrderedDict
from prettytable import PrettyTable
def summary(main_prog):
'''
It can summary model's PARAMS, FLOPs until now.
It support common operator like conv, fc, pool, relu, sigmoid, bn etc.
Args:
main_prog: main program
Returns:
print summary on terminal
'''
collected_ops_list = []
for one_b in main_prog.blocks:
block_vars = one_b.vars
for one_op in one_b.ops:
op_info = OrderedDict()
spf_res = _summary_model(block_vars, one_op)
if spf_res is None:
continue
# TODO: get the operator name
op_info['type'] = one_op.type
op_info['input_shape'] = spf_res[0][1:]
op_info['out_shape'] = spf_res[1][1:]
op_info['PARAMs'] = spf_res[2]
op_info['FLOPs'] = spf_res[3]
collected_ops_list.append(op_info)
summary_table, total = _format_summary(collected_ops_list)
_print_summary(summary_table, total)
def _summary_model(block_vars, one_op):
'''
Compute operator's params and flops.
Args:
block_vars: all vars of one block
one_op: one operator to count
Returns:
in_data_shape: one operator's input data shape
out_data_shape: one operator's output data shape
params: one operator's PARAMs
flops: : one operator's FLOPs
'''
if one_op.type in ['conv2d', 'depthwise_conv2d']:
k_arg_shape = block_vars[one_op.input("Filter")[0]].shape
in_data_shape = block_vars[one_op.input("Input")[0]].shape
out_data_shape = block_vars[one_op.output("Output")[0]].shape
c_out, c_in, k_h, k_w = k_arg_shape
_, c_out_, h_out, w_out = out_data_shape
assert c_out == c_out_, 'shape error!'
k_groups = one_op.attr("groups")
kernel_ops = k_h * k_w * (c_in / k_groups)
bias_ops = 0 if one_op.input("Bias") == [] else 1
params = c_out * (kernel_ops + bias_ops)
flops = h_out * w_out * c_out * (kernel_ops + bias_ops)
# base nvidia paper, include mul and add
flops = 2 * flops
elif one_op.type == 'pool2d':
in_data_shape = block_vars[one_op.input("X")[0]].shape
out_data_shape = block_vars[one_op.output("Out")[0]].shape
_, c_out, h_out, w_out = out_data_shape
k_size = one_op.attr("ksize")
params = 0
flops = h_out * w_out * c_out * (k_size[0] * k_size[1])
elif one_op.type == 'mul':
k_arg_shape = block_vars[one_op.input("Y")[0]].shape
in_data_shape = block_vars[one_op.input("X")[0]].shape
out_data_shape = block_vars[one_op.output("Out")[0]].shape
# TODO: fc has mul ops
# add attr to mul op, tell us whether it belongs to 'fc'
# this's not the best way
if 'fc' not in one_op.output("Out")[0]:
return None
k_in, k_out = k_arg_shape
# bias in sum op
params = k_in * k_out + 1
flops = k_in * k_out
elif one_op.type in ['sigmoid', 'tanh', 'relu', 'leaky_relu', 'prelu']:
in_data_shape = block_vars[one_op.input("X")[0]].shape
out_data_shape = block_vars[one_op.output("Out")[0]].shape
params = 0
if one_op.type == 'prelu':
params = 1
flops = 1
for one_dim in in_data_shape:
flops *= one_dim
elif one_op.type == 'batch_norm':
in_data_shape = block_vars[one_op.input("X")[0]].shape
out_data_shape = block_vars[one_op.output("Y")[0]].shape
_, c_in, h_out, w_out = in_data_shape
# gamma, beta
params = c_in * 2
# compute mean and std
flops = h_out * w_out * c_in * 2
else:
return None
return in_data_shape, out_data_shape, params, flops
def _format_summary(collected_ops_list):
'''
Format summary report.
Args:
collected_ops_list: the collected operator with summary
Returns:
summary_table: summary report format
total: sum param and flops
'''
summary_table = PrettyTable(
["No.", "TYPE", "INPUT", "OUTPUT", "PARAMs", "FLOPs"])
summary_table.align = 'r'
total = {}
total_params = []
total_flops = []
for i, one_op in enumerate(collected_ops_list):
# notice the order
table_row = [
i,
one_op['type'],
one_op['input_shape'],
one_op['out_shape'],
int(one_op['PARAMs']),
int(one_op['FLOPs']),
]
summary_table.add_row(table_row)
total_params.append(int(one_op['PARAMs']))
total_flops.append(int(one_op['FLOPs']))
total['params'] = total_params
total['flops'] = total_flops
return summary_table, total
def _print_summary(summary_table, total):
'''
Print all the summary on terminal.
Args:
summary_table: summary report format
total: sum param and flops
'''
parmas = total['params']
flops = total['flops']
print(summary_table)
print('Total PARAMs: {}({:.4f}M)'.format(
sum(parmas), sum(parmas) / (10**6)))
print('Total FLOPs: {}({:.2f}G)'.format(sum(flops), sum(flops) / 10**9))
print(
"Notice: \n now supported ops include [Conv, DepthwiseConv, FC(mul), BatchNorm, Pool, Activation(sigmoid, tanh, relu, leaky_relu, prelu)]"
)
...@@ -19,7 +19,7 @@ from .... import Program ...@@ -19,7 +19,7 @@ from .... import Program
from .... import program_guard from .... import program_guard
from .... import regularizer from .... import regularizer
__all__ = ['FSPDistiller', 'L2Distiller'] __all__ = ['FSPDistiller', 'L2Distiller', 'SoftLabelDistiller']
class L2Distiller(object): class L2Distiller(object):
...@@ -186,3 +186,91 @@ class FSPDistillerPass(object): ...@@ -186,3 +186,91 @@ class FSPDistillerPass(object):
def _fsp_matrix(self, fea_map_0, fea_map_1): def _fsp_matrix(self, fea_map_0, fea_map_1):
return layers.fsp_matrix(fea_map_0, fea_map_1) return layers.fsp_matrix(fea_map_0, fea_map_1)
class SoftLabelDistiller(object):
"""
Combine two layers from student net and teacher net by softmax_with_cross_entropy loss.
And add the loss into the total loss using for distillation training.
"""
def __init__(self,
student_feature_map=None,
teacher_feature_map=None,
student_temperature=1.0,
teacher_temperature=1.0,
distillation_loss_weight=1):
"""
Args:
student_feature_map(str): The name of feature map from student network.
teacher_feature_map(str): The name of feature map from teacher network.
It's shape should be the same with student network.
student_temperature(float): Temperature used to divide student_feature_map before softmax_with_cross_entropy. default: 1.0
teacher_temperature(float): Temperature used to divide teacher_feature_map before softmax_with_cross_entropy. default: 1.0
distillation_loss_weight(float): The weight of the l2-loss.
"""
self.student_feature_map = student_feature_map
self.teacher_feature_map = teacher_feature_map
self.distillation_loss_weight = distillation_loss_weight
self.student_temperature = student_temperature
self.teacher_temperature = teacher_temperature
def distiller_loss(self, graph):
"""
Modify graph inplace to add softmax_with_cross_entropy loss.
Args:
graph(GraphWrapper): The graph to be modified.
Returns:
GraphWrapper: The modified graph.
"""
distiller_pass = SoftLabelDistillerPass(
self.student_feature_map, self.teacher_feature_map,
self.student_temperature, self.teacher_temperature,
self.distillation_loss_weight)
dis_graph = distiller_pass.apply(graph)
return dis_graph
class SoftLabelDistillerPass(object):
def __init__(self,
student_feature_map,
teacher_feature_map,
student_temperature,
teacher_temperature,
distillation_loss_weight=1):
"""
Args:
student_feature_map(str): The name of feature map from student network.
teacher_feature_map(str): The name of feature map from teacher network.
It's shape should be the same with student network.
student_temperature(float): Temperature used to divide student_feature_map before softmax_with_cross_entropy.
teacher_temperature(float): Temperature used to divide teacher_feature_map before softmax_with_cross_entropy.
distillation_loss_weight(float): The weight of the l2-loss.
"""
self.student_feature_map = student_feature_map
self.teacher_feature_map = teacher_feature_map
self.student_temperature = student_temperature
self.teacher_temperature = teacher_temperature
self.distillation_loss_weight = distillation_loss_weight
def apply(self, graph):
ret_graph = graph
with program_guard(ret_graph.program):
student_feature_map = ret_graph.var(self.student_feature_map)._var
teacher_feature_map = ret_graph.var(self.teacher_feature_map)._var
s_fea = student_feature_map / self.student_temperature
t_fea = teacher_feature_map / self.distillation_loss_weight
t_fea.stop_gradient = True
ce_loss = layers.softmax_with_cross_entropy(
s_fea, t_fea, soft_label=True)
distillation_loss = ce_loss * self.distillation_loss_weight
student_loss = ret_graph.var(ret_graph.out_nodes['loss'])._var
loss = distillation_loss + student_loss
ret_graph.out_nodes[
'soft_label_loss_' + self.student_feature_map + "_" +
self.teacher_feature_map] = distillation_loss.name
ret_graph.out_nodes['loss'] = loss.name
return ret_graph
...@@ -204,6 +204,10 @@ class GraphWrapper(object): ...@@ -204,6 +204,10 @@ class GraphWrapper(object):
""" """
super(GraphWrapper, self).__init__() super(GraphWrapper, self).__init__()
self.program = Program() if program is None else program self.program = Program() if program is None else program
self.persistables = {}
for var in self.program.list_vars():
if var.persistable:
self.persistables[var.name] = var
self.compiled_graph = None self.compiled_graph = None
self.in_nodes = OrderedDict(in_nodes) self.in_nodes = OrderedDict(in_nodes)
self.out_nodes = OrderedDict(out_nodes) self.out_nodes = OrderedDict(out_nodes)
...@@ -467,7 +471,12 @@ class GraphWrapper(object): ...@@ -467,7 +471,12 @@ class GraphWrapper(object):
path(str): The path to save the persistables. path(str): The path to save the persistables.
exe(framework.Executor): The executor used to save the persistables. exe(framework.Executor): The executor used to save the persistables.
""" """
io.save_persistables(exe.exe, path, main_program=self.program) # update persistables from program
for var in self.program.list_vars():
if var.persistable and var.name not in self.persistables:
self.persistables[var.name] = var
io.save_vars(exe.exe, path, vars=self.persistables.values())
def load_persistables(self, path, exe): def load_persistables(self, path, exe):
""" """
...@@ -481,7 +490,7 @@ class GraphWrapper(object): ...@@ -481,7 +490,7 @@ class GraphWrapper(object):
return os.path.exists(os.path.join(path, var.name)) return os.path.exists(os.path.join(path, var.name))
io.load_vars( io.load_vars(
exe.exe, path, main_program=self.program, predicate=if_exist) exe.exe, path, vars=self.persistables.values(), predicate=if_exist)
def update_param_shape(self, scope): def update_param_shape(self, scope):
""" """
......
...@@ -20,7 +20,7 @@ from .... import io ...@@ -20,7 +20,7 @@ from .... import io
from .... import core from .... import core
from ....compiler import CompiledProgram from ....compiler import CompiledProgram
from ....compiler import BuildStrategy from ....compiler import BuildStrategy
from ....framework import IrGraph from ....framework import IrGraph, Variable, Program
from ..core.strategy import Strategy from ..core.strategy import Strategy
from .quantization_pass import * from .quantization_pass import *
...@@ -88,41 +88,76 @@ class QuantizationStrategy(Strategy): ...@@ -88,41 +88,76 @@ class QuantizationStrategy(Strategy):
self.save_out_nodes = save_out_nodes self.save_out_nodes = save_out_nodes
self.save_in_nodes = save_in_nodes self.save_in_nodes = save_in_nodes
def on_compression_begin(self, context):
"""
Restore graph when the compressoin task is inited from checkpoint.
"""
# It is inited from checkpoint and has missed start epoch.
if context.epoch_id != 0 and context.epoch_id > self.start_epoch:
_logger.info("Restore quantization task from checkpoint")
self._modify_graph_for_quantization(context)
_logger.info("Finish restoring quantization task from checkpoint")
def _modify_graph_for_quantization(self, context):
"""
Insert fake_quantize_op and fake_dequantize_op before trainging and testing.
"""
train_ir_graph = IrGraph(
core.Graph(context.optimize_graph.program.clone().desc),
for_test=False)
test_ir_graph = IrGraph(
core.Graph(context.eval_graph.program.clone().desc), for_test=True)
transform_pass = QuantizationTransformPass(
scope=context.scope,
place=context.place,
weight_bits=self.weight_bits,
activation_bits=self.activation_bits,
activation_quantize_type=self.activation_quantize_type,
weight_quantize_type=self.weight_quantize_type)
transform_pass.apply(train_ir_graph)
transform_pass.apply(test_ir_graph)
# Put persistables created by transform_pass into context.optimize_graph.persistables
# for saving checkpoint.
program_persistables = set()
for var in context.optimize_graph.program.list_vars():
if var.persistable:
program_persistables.add(var.name)
program = Program()
for var_node in train_ir_graph.all_persistable_nodes():
if var_node.name() not in program_persistables:
var_desc = var_node.var()
var = program.global_block().create_var(
name=var_node.name(),
shape=var_desc.shape(),
dtype=var_desc.dtype(),
type=var_desc.type(),
lod_level=var_desc.lod_level())
context.optimize_graph.persistables[var.name] = var
build_strategy = BuildStrategy()
build_strategy.enable_inplace = False
build_strategy.memory_optimize = False
# for quantization training
context.optimize_graph.compiled_graph = CompiledProgram(
train_ir_graph.graph).with_data_parallel(
loss_name=context.optimize_graph.out_nodes['loss'],
build_strategy=build_strategy)
# for evaluation. And program compiled from ir graph must be with data parallel.
context.eval_graph.compiled_graph = CompiledProgram(
test_ir_graph.graph).with_data_parallel(
build_strategy=build_strategy)
# for saving inference model after training
context.put('quantization_test_ir_graph_backup', test_ir_graph)
def on_epoch_begin(self, context): def on_epoch_begin(self, context):
""" """
Insert fake_quantize_op and fake_dequantize_op before trainging and testing. Insert fake_quantize_op and fake_dequantize_op before trainging and testing.
""" """
super(QuantizationStrategy, self).on_compression_begin(context) super(QuantizationStrategy, self).on_epoch_begin(context)
if self.start_epoch == context.epoch_id: if self.start_epoch == context.epoch_id:
_logger.info('QuantizationStrategy::on_epoch_begin') _logger.info('QuantizationStrategy::on_epoch_begin')
train_ir_graph = IrGraph( self._modify_graph_for_quantization(context)
core.Graph(context.optimize_graph.program.desc), for_test=False)
test_ir_graph = IrGraph(
core.Graph(context.eval_graph.program.desc), for_test=True)
transform_pass = QuantizationTransformPass(
scope=context.scope,
place=context.place,
weight_bits=self.weight_bits,
activation_bits=self.activation_bits,
activation_quantize_type=self.activation_quantize_type,
weight_quantize_type=self.weight_quantize_type)
transform_pass.apply(train_ir_graph)
transform_pass.apply(test_ir_graph)
build_strategy = BuildStrategy()
build_strategy.enable_inplace = False
build_strategy.memory_optimize = False
# for quantization training
context.optimize_graph.compiled_graph = CompiledProgram(
train_ir_graph.graph).with_data_parallel(
loss_name=context.optimize_graph.out_nodes['loss'],
build_strategy=build_strategy)
# for evaluation. And program compiled from ir graph must be with data parallel.
context.eval_graph.compiled_graph = CompiledProgram(
test_ir_graph.graph).with_data_parallel(
build_strategy=build_strategy)
# for saving inference model after training
context.put('quantization_test_ir_graph_backup', test_ir_graph)
_logger.info('Finish QuantizationStrategy::on_epoch_begin') _logger.info('Finish QuantizationStrategy::on_epoch_begin')
def on_epoch_end(self, context): def on_epoch_end(self, context):
......
...@@ -33,10 +33,17 @@ distillers: ...@@ -33,10 +33,17 @@ distillers:
teacher_feature_map: 'teacher.tmp_2' teacher_feature_map: 'teacher.tmp_2'
student_feature_map: 'student.tmp_2' student_feature_map: 'student.tmp_2'
distillation_loss_weight: 1 distillation_loss_weight: 1
soft_label_distiller:
class: 'SoftLabelDistiller'
student_temperature: 1.0
teacher_temperature: 1.0
teacher_feature_map: 'teacher.tmp_1'
student_feature_map: 'student.tmp_1'
distillation_loss_weight: 0.001
strategies: strategies:
distillation_strategy: distillation_strategy:
class: 'DistillationStrategy' class: 'DistillationStrategy'
distillers: ['fsp_distiller', 'l2_distiller'] distillers: ['fsp_distiller', 'l2_distiller', 'soft_label_distiller']
start_epoch: 0 start_epoch: 0
end_epoch: 1 end_epoch: 1
compressor: compressor:
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
from functools import partial
import numpy as np
import paddle
import paddle.fluid as fluid
import contextlib
def get_places():
places = [fluid.CPUPlace()]
if fluid.core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
return places
@contextlib.contextmanager
def prog_scope_guard(main_prog, startup_prog):
scope = fluid.core.Scope()
with fluid.unique_name.guard():
with fluid.scope_guard(scope):
with fluid.program_guard(main_prog, startup_prog):
yield
def bow_net(data,
label,
dict_dim,
is_sparse=False,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2):
"""
BOW net
This model is from https://github.com/PaddlePaddle/models:
fluid/PaddleNLP/text_classification/nets.py
"""
emb = fluid.layers.embedding(
input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim])
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return avg_cost
class TestWeightDecay(unittest.TestCase):
def setUp(self):
self.word_dict = paddle.dataset.imdb.word_dict()
reader = paddle.batch(
paddle.dataset.imdb.train(self.word_dict), batch_size=2)()
self.train_data = [next(reader) for _ in range(5)]
self.learning_rate = .5
def run_program(self, place, feed_list):
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
exe.run(fluid.default_startup_program())
main_prog = fluid.default_main_program()
param_list = [var.name for var in main_prog.block(0).all_parameters()]
param_sum = []
for data in self.train_data:
out = exe.run(main_prog,
feed=feeder.feed(data),
fetch_list=param_list)
p_sum = 0
for v in out:
p_sum += np.sum(np.abs(v))
param_sum.append(p_sum)
return param_sum
def check_weight_decay(self, place, model):
main_prog = fluid.framework.Program()
startup_prog = fluid.framework.Program()
startup_prog.random_seed = 1
with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog):
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
avg_cost = model(data, label, len(self.word_dict))
AdamW = fluid.contrib.extend_with_decoupled_weight_decay(
fluid.optimizer.Adam)
optimizer = AdamW(
learning_rate=self.learning_rate,
weight_decay=self.learning_rate)
optimizer.minimize(avg_cost)
param_sum = self.run_program(place, [data, label])
return param_sum
def check_weight_decay2(self, place, model):
main_prog = fluid.framework.Program()
startup_prog = fluid.framework.Program()
startup_prog.random_seed = 1
with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog):
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
avg_cost = model(data, label, len(self.word_dict))
param_list = [(var, var * self.learning_rate)
for var in main_prog.block(0).all_parameters()]
optimizer = fluid.optimizer.Adam(learning_rate=self.learning_rate)
optimizer.minimize(avg_cost)
for params in param_list:
updated_p = fluid.layers.elementwise_sub(
x=params[0], y=params[1])
fluid.layers.assign(input=updated_p, output=params[0])
param_sum = self.run_program(place, [data, label])
return param_sum
def test_weight_decay(self):
for place in get_places():
model = partial(bow_net, is_sparse=False)
param_sum1 = self.check_weight_decay(place, model)
param_sum2 = self.check_weight_decay2(place, model)
for i in range(len(param_sum1)):
assert np.isclose(a=param_sum1[i], b=param_sum2[i], rtol=5e-5)
if __name__ == '__main__':
unittest.main()
...@@ -32,6 +32,9 @@ from .profiler import * ...@@ -32,6 +32,9 @@ from .profiler import *
from . import checkpoint from . import checkpoint
from .checkpoint import * from .checkpoint import *
from . import learning_rate_scheduler
from .learning_rate_scheduler import *
__all__ = [] __all__ = []
__all__ += layers.__all__ __all__ += layers.__all__
__all__ += base.__all__ __all__ += base.__all__
...@@ -39,3 +42,4 @@ __all__ += nn.__all__ ...@@ -39,3 +42,4 @@ __all__ += nn.__all__
__all__ += tracer.__all__ __all__ += tracer.__all__
__all__ += profiler.__all__ __all__ += profiler.__all__
__all__ += checkpoint.__all__ __all__ += checkpoint.__all__
__all__ += learning_rate_scheduler.__all__
...@@ -22,7 +22,7 @@ __all__ = ['enabled', 'guard', 'to_variable'] ...@@ -22,7 +22,7 @@ __all__ = ['enabled', 'guard', 'to_variable']
def enabled(): def enabled():
return framework._in_imperative_mode() return framework._in_dygraph_mode()
@signature_safe_contextmanager @signature_safe_contextmanager
...@@ -39,14 +39,14 @@ def guard(place=None): ...@@ -39,14 +39,14 @@ def guard(place=None):
with framework.program_guard(train, startup): with framework.program_guard(train, startup):
with framework.unique_name.guard(): with framework.unique_name.guard():
with framework._imperative_guard(tracer): with framework._dygraph_guard(tracer):
with framework._imperative_place_guard(place): with framework._dygraph_place_guard(place):
yield yield
def to_variable(value, block=None, name=None): def to_variable(value, block=None, name=None):
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
assert enabled(), "to_variable could only be called in imperative mode" assert enabled(), "to_variable could only be called in dygraph mode"
if not block: if not block:
block = framework.default_main_program().current_block() block = framework.default_main_program().current_block()
......
...@@ -68,7 +68,7 @@ def save_persistables(vardict, dirname, filename=None): ...@@ -68,7 +68,7 @@ def save_persistables(vardict, dirname, filename=None):
dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
init_cell) init_cell)
param_path = "./my_paddle_model" param_path = "./my_paddle_model"
fluid.imperative.checkpoint.save_persistables(ptb_model.state_dict(), dirname=param_path, fluid.dygraph.save_persistables(ptb_model.state_dict(), dirname=param_path,
layer=ptb_model) layer=ptb_model)
""" """
if isinstance(vardict, collections.OrderedDict): if isinstance(vardict, collections.OrderedDict):
...@@ -97,17 +97,17 @@ def load_persistables(vardict, dirname, filename=None): ...@@ -97,17 +97,17 @@ def load_persistables(vardict, dirname, filename=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
my_layer = layer(fluid.imperative.Layer) my_layer = layer(fluid.dygraph.Layer)
param_path = "./my_paddle_model" param_path = "./my_paddle_model"
param_dict = fluid.imperative.checkpoint.load_persistables(my_layer.parameters(), param_path) param_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path)
param_1 = param_dict['PtbModel_0.w_1'] param_1 = param_dict['PtbModel_0.w_1']
or: or:
my_layer = layer(fluid.imperative.Layer) my_layer = layer(fluid.dygraph.Layer)
param_path = "./my_paddle_model" param_path = "./my_paddle_model"
filename = "model.file" filename = "model.file"
param_dict = fluid.imperative.checkpoint.load_persistables(my_layer.state_dict(), param_path, param_dict = fluid.dygraph.load_persistables(my_layer.state_dict(), param_path,
filename=filename) filename=filename)
param_1 = param_dict['PtbModel_0.w_1'] param_1 = param_dict['PtbModel_0.w_1']
......
...@@ -16,7 +16,7 @@ from __future__ import print_function ...@@ -16,7 +16,7 @@ from __future__ import print_function
import copy import copy
import six import six
from ..framework import Parameter, _in_imperative_mode from ..framework import Parameter, _in_dygraph_mode
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from .. import core from .. import core
from six.moves import zip from six.moves import zip
......
...@@ -283,7 +283,7 @@ class PyLayer(core.PyLayer): ...@@ -283,7 +283,7 @@ class PyLayer(core.PyLayer):
@classmethod @classmethod
def __call__(cls, *inputs): def __call__(cls, *inputs):
tracer = framework._imperative_tracer() tracer = framework._dygraph_tracer()
block = framework.default_main_program().current_block() block = framework.default_main_program().current_block()
ivar_inputs = [x._ivar for x in inputs] ivar_inputs = [x._ivar for x in inputs]
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import math
from .. import unique_name
__all__ = [
'NoamDecay', 'PiecewiseDecay', 'NaturalExpDecay', 'ExponentialDecay',
'InverseTimeDecay', 'PolynomialDecay', 'CosineDecay'
]
class LearningRateDecay(object):
"""
Base class of learning rate decay
"""
def __init__(self, begin=0, step=1, dtype='float32'):
self.step_num = begin
self.step_size = step
self.dtype = dtype
def __call__(self):
lr = self.step()
if isinstance(lr, float):
lr = self.create_lr_var(lr)
self.step_num += self.step_size
return lr
def create_lr_var(self, lr):
from .. import layers
lr = layers.create_global_var(
name=unique_name.generate("learning_rate"),
shape=[1],
value=float(lr),
dtype=self.dtype,
persistable=True)
return lr
def step(self):
raise NotImplementedError()
class PiecewiseDecay(LearningRateDecay):
def __init__(self, boundaries, values, begin, step=1, dtype='float32'):
super(PiecewiseDecay, self).__init__(begin, step, dtype)
self.boundaries = boundaries
self.values = values
self.vars = []
for value in values:
self.vars.append(self.create_lr_var(value))
def step(self):
for i in range(len(self.boundaries)):
if self.step_num < self.boundaries[i]:
return self.vars[i]
return self.vars[len(self.values) - 1]
class NaturalExpDecay(LearningRateDecay):
def __init__(self,
learning_rate,
decay_steps,
decay_rate,
staircase=False,
begin=0,
step=1,
dtype='float32'):
super(NaturalExpDecay, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.staircase = staircase
def step(self):
from .. import layers
div_res = self.create_lr_var(self.step_num / self.decay_steps)
if self.staircase:
div_res = layers.floor(div_res)
decayed_lr = self.learning_rate * layers.exp(-1 * self.decay_rate *
div_res)
return decayed_lr
class ExponentialDecay(LearningRateDecay):
def __init__(self,
learning_rate,
decay_steps,
decay_rate,
staircase=False,
begin=0,
step=1,
dtype='float32'):
super(ExponentialDecay, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.staircase = staircase
def step(self):
from .. import layers
div_res = self.create_lr_var(self.step_num / self.decay_steps)
if self.staircase:
div_res = layers.floor(div_res)
decayed_lr = self.learning_rate * (self.decay_rate**div_res)
return decayed_lr
class InverseTimeDecay(LearningRateDecay):
def __init__(self,
learning_rate,
decay_steps,
decay_rate,
staircase=False,
begin=0,
step=1,
dtype='float32'):
super(InverseTimeDecay, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.staircase = staircase
def step(self):
from .. import layers
div_res = self.create_lr_var(self.step_num / self.decay_steps)
if self.staircase:
div_res = layers.floor(div_res)
decayed_lr = self.learning_rate / (1 + self.decay_rate * div_res)
return decayed_lr
class PolynomialDecay(LearningRateDecay):
def __init__(self,
learning_rate,
decay_steps,
end_learning_rate=0.0001,
power=1.0,
cycle=False,
begin=0,
step=1,
dtype='float32'):
super(PolynomialDecay, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.end_learning_rate = end_learning_rate
self.power = power
self.cycle = cycle
def step(self):
from .. import layers
tmp_step_num = self.step_num
tmp_decay_steps = self.decay_steps
if self.cycle:
div_res = layers.ceil(
self.create_lr_var(tmp_step_num / float(self.decay_steps)))
if tmp_step_num == 0:
div_res = self.create_lr_var(1.0)
tmp_decay_steps = self.decay_steps * div_res
else:
tmp_step_num = self.create_lr_var(tmp_step_num
if tmp_step_num < self.decay_steps
else self.decay_steps)
decayed_lr = (self.learning_rate - self.end_learning_rate) * \
((1 - tmp_step_num / tmp_decay_steps) ** self.power) + self.end_learning_rate
return decayed_lr
class CosineDecay(LearningRateDecay):
def __init__(self,
learning_rate,
step_each_epoch,
epochs,
begin=0,
step=1,
dtype='float32'):
super(CosineDecay, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.step_each_epoch = step_each_epoch
self.epochs = epochs
def step(self):
from .. import layers
cur_epoch = layers.floor(
self.create_lr_var(self.step_num / self.step_each_epoch))
decayed_lr = self.learning_rate * 0.5 * (
layers.cos(cur_epoch * math.pi / self.epochs) + 1)
return decayed_lr
class NoamDecay(LearningRateDecay):
def __init__(self, d_model, warmup_steps, begin=1, step=1, dtype='float32'):
super(NoamDecay, self).__init__(begin, step, dtype)
self.d_model = d_model
self.warmup_steps = warmup_steps
def step(self):
from .. import layers
a = self.create_lr_var(self.step_num**-0.5)
b = self.create_lr_var((self.warmup_steps**-1.5) * self.step_num)
lr_value = (self.d_model**-0.5) * layers.elementwise_min(a, b)
return lr_value
...@@ -133,7 +133,7 @@ class Conv2D(layers.Layer): ...@@ -133,7 +133,7 @@ class Conv2D(layers.Layer):
outputs={'Out': [pre_act]}, outputs={'Out': [pre_act]},
attrs={'axis': 1}) attrs={'axis': 1})
# Currently, we don't support inplace in imperative mode # Currently, we don't support inplace in dygraph mode
return self._helper.append_activation(pre_act, act=self._act) return self._helper.append_activation(pre_act, act=self._act)
...@@ -265,7 +265,7 @@ class FC(layers.Layer): ...@@ -265,7 +265,7 @@ class FC(layers.Layer):
attrs={'axis': self._num_flatten_dims}) attrs={'axis': self._num_flatten_dims})
else: else:
pre_activation = pre_bias pre_activation = pre_bias
# Currently, we don't support inplace in imperative mode # Currently, we don't support inplace in dygraph mode
return self._helper.append_activation(pre_activation, act=self._act) return self._helper.append_activation(pre_activation, act=self._act)
...@@ -387,7 +387,7 @@ class BatchNorm(layers.Layer): ...@@ -387,7 +387,7 @@ class BatchNorm(layers.Layer):
"use_global_stats": self._use_global_stats "use_global_stats": self._use_global_stats
}) })
# Currently, we don't support inplace in imperative mode # Currently, we don't support inplace in dygraph mode
return self._helper.append_activation(batch_norm_out, self._act) return self._helper.append_activation(batch_norm_out, self._act)
...@@ -426,7 +426,7 @@ class Embedding(layers.Layer): ...@@ -426,7 +426,7 @@ class Embedding(layers.Layer):
dict_size = len(dataset.ids) dict_size = len(dataset.ids)
input = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32') input = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32')
embedding = fluid.imperative.Embedding(size=[dict_size, 16]) embedding = fluid.dygraph.Embedding(size=[dict_size, 16])
fc = embedding(input) fc = embedding(input)
""" """
......
...@@ -24,12 +24,12 @@ __all__ = ['Tracer'] ...@@ -24,12 +24,12 @@ __all__ = ['Tracer']
def release_op(op): def release_op(op):
del framework._imperative_tracer()._ops[op._trace_id] del framework._dygraph_tracer()._ops[op._trace_id]
class Tracer(core.Tracer): class Tracer(core.Tracer):
""" """
Python wrapper of imperative tracer Python wrapper of dygraph tracer
""" """
def __init__(self, block): def __init__(self, block):
......
...@@ -75,20 +75,20 @@ GRAD_VAR_SUFFIX = core.kGradVarSuffix() ...@@ -75,20 +75,20 @@ GRAD_VAR_SUFFIX = core.kGradVarSuffix()
ZERO_VAR_SUFFIX = core.kZeroVarSuffix() ZERO_VAR_SUFFIX = core.kZeroVarSuffix()
CONTROL_DEP_VAR_PREFIX = core.kControlDepVarName() CONTROL_DEP_VAR_PREFIX = core.kControlDepVarName()
_imperative_tracer_ = None _dygraph_tracer_ = None
_imperative_current_expected_place_ = None _dygraph_current_expected_place_ = None
def _in_imperative_mode(): def _in_dygraph_mode():
return _imperative_tracer_ is not None return _dygraph_tracer_ is not None
def _imperative_tracer(): def _dygraph_tracer():
return _imperative_tracer_ return _dygraph_tracer_
def _current_expected_place(): def _current_expected_place():
return _imperative_current_expected_place_ return _dygraph_current_expected_place_
def _cpu_num(): def _cpu_num():
...@@ -396,7 +396,7 @@ class Variable(object): ...@@ -396,7 +396,7 @@ class Variable(object):
if not isinstance(dtype, core.VarDesc.VarType): if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype) dtype = convert_np_dtype_to_dtype_(dtype)
if _in_imperative_mode(): if _in_dygraph_mode():
# record vars in tracer rather than blocks # record vars in tracer rather than blocks
self._ivar = kwargs.get("ivar", None) self._ivar = kwargs.get("ivar", None)
if not self._ivar: if not self._ivar:
...@@ -406,7 +406,7 @@ class Variable(object): ...@@ -406,7 +406,7 @@ class Variable(object):
_current_expected_place(), stop_gradient, True _current_expected_place(), stop_gradient, True
if persistable else False) if persistable else False)
if persistable: if persistable:
_imperative_tracer().trace_var(name, self) _dygraph_tracer().trace_var(name, self)
else: else:
self.error_clip = error_clip self.error_clip = error_clip
...@@ -515,8 +515,8 @@ class Variable(object): ...@@ -515,8 +515,8 @@ class Variable(object):
Returns: Returns:
str: The debug string. str: The debug string.
""" """
if _in_imperative_mode(): if _in_dygraph_mode():
# TODO(panyx0718): add more imperative debug info. # TODO(panyx0718): add more dygraph debug info.
return 'name %s, dtype: %s shape: %s' % (self.name, self.dtype, return 'name %s, dtype: %s shape: %s' % (self.name, self.dtype,
self.shape) self.shape)
...@@ -548,42 +548,42 @@ class Variable(object): ...@@ -548,42 +548,42 @@ class Variable(object):
@property @property
def _stop_gradient(self): def _stop_gradient(self):
if _in_imperative_mode(): if _in_dygraph_mode():
return self._ivar.stop_gradient return self._ivar.stop_gradient
else: else:
return self.stop_gradient return self.stop_gradient
@_stop_gradient.setter @_stop_gradient.setter
def _stop_gradient(self, s): def _stop_gradient(self, s):
if _in_imperative_mode(): if _in_dygraph_mode():
self._ivar.stop_gradient = s self._ivar.stop_gradient = s
else: else:
self.stop_gradient = s self.stop_gradient = s
@property @property
def persistable(self): def persistable(self):
if _in_imperative_mode(): if _in_dygraph_mode():
return self._ivar.persistable return self._ivar.persistable
else: else:
return self.desc.persistable() return self.desc.persistable()
@persistable.setter @persistable.setter
def persistable(self, p): def persistable(self, p):
if _in_imperative_mode(): if _in_dygraph_mode():
return self._ivar.persistable return self._ivar.persistable
else: else:
self.desc.set_persistable(p) self.desc.set_persistable(p)
@property @property
def name(self): def name(self):
if _in_imperative_mode(): if _in_dygraph_mode():
return self._ivar.name return self._ivar.name
else: else:
return cpt.to_text(self.desc.name()) return cpt.to_text(self.desc.name())
@name.setter @name.setter
def name(self, new_name): def name(self, new_name):
if _in_imperative_mode(): if _in_dygraph_mode():
self._ivar.name = new_name self._ivar.name = new_name
else: else:
self.desc.set_name(new_name) self.desc.set_name(new_name)
...@@ -591,26 +591,26 @@ class Variable(object): ...@@ -591,26 +591,26 @@ class Variable(object):
@property @property
def shape(self): def shape(self):
# convert to tuple, make it as same as numpy API. # convert to tuple, make it as same as numpy API.
if _in_imperative_mode(): if _in_dygraph_mode():
return self._ivar.shape return self._ivar.shape
else: else:
return tuple(self.desc.shape()) return tuple(self.desc.shape())
@property @property
def dtype(self): def dtype(self):
if _in_imperative_mode(): if _in_dygraph_mode():
return self._ivar.dtype return self._ivar.dtype
else: else:
return self.desc.dtype() return self.desc.dtype()
@property @property
def lod_level(self): def lod_level(self):
# TODO(minqiyang): Support lod_level in imperative mode # TODO(minqiyang): Support lod_level in dygraph mode
return self.desc.lod_level() return self.desc.lod_level()
@property @property
def type(self): def type(self):
if _in_imperative_mode(): if _in_dygraph_mode():
return self._ivar.dtype return self._ivar.dtype
else: else:
return self.desc.type() return self.desc.type()
...@@ -789,13 +789,24 @@ class Variable(object): ...@@ -789,13 +789,24 @@ class Variable(object):
if isinstance(item, tuple): if isinstance(item, tuple):
if len(item) > len(self.shape): if len(item) > len(self.shape):
raise IndexError("Too many indexes") raise IndexError("Too many indexes")
fixedSize = True
for i in range(len(self.shape)):
if self.shape[i] == -1:
fixedSize = False
break
newitem = self._reconstructSliceinfo(item) or item newitem = self._reconstructSliceinfo(item) or item
check, info = self._detectContinuesSlice(newitem) if fixedSize:
if check: check, info = self._detectContinuesSlice(newitem)
starts = info[0] if check:
ends = info[1] starts = info[0]
axes = [i for i in range(len(starts))] ends = info[1]
return self._sliceVar(axes, starts, ends) axes = [i for i in range(len(starts))]
return self._sliceVar(axes, starts, ends)
else:
new_var = self
for index, o in enumerate(newitem):
new_var = new_var._sliceAndConcatVar(o, index)
else: else:
new_var = self new_var = self
for index, o in enumerate(newitem): for index, o in enumerate(newitem):
...@@ -918,7 +929,7 @@ class Operator(object): ...@@ -918,7 +929,7 @@ class Operator(object):
inputs=None, inputs=None,
outputs=None, outputs=None,
attrs=None): attrs=None):
if _in_imperative_mode(): if _in_dygraph_mode():
if type is None: if type is None:
raise ValueError( raise ValueError(
"`type` to initialized an Operator can not be None.") "`type` to initialized an Operator can not be None.")
...@@ -1037,7 +1048,7 @@ class Operator(object): ...@@ -1037,7 +1048,7 @@ class Operator(object):
for arg in out_args: for arg in out_args:
out_arg_names.append(cpt.to_text(arg.name)) out_arg_names.append(cpt.to_text(arg.name))
# TODO(minqiyang): could we remove variable's op in static mode? # TODO(minqiyang): could we remove variable's op in static mode?
if not _in_imperative_mode(): if not _in_dygraph_mode():
arg.op = self arg.op = self
self.desc.set_output(out_proto.name, out_arg_names) self.desc.set_output(out_proto.name, out_arg_names)
...@@ -1083,7 +1094,7 @@ class Operator(object): ...@@ -1083,7 +1094,7 @@ class Operator(object):
@property @property
def type(self): def type(self):
if _in_imperative_mode(): if _in_dygraph_mode():
return self.iop.type return self.iop.type
else: else:
return self.desc.type() return self.desc.type()
...@@ -1626,7 +1637,7 @@ class Block(object): ...@@ -1626,7 +1637,7 @@ class Block(object):
Returns: Returns:
Operator: the append Operator. Operator: the append Operator.
""" """
if _in_imperative_mode(): if _in_dygraph_mode():
op = Operator( op = Operator(
block=self, block=self,
desc=None, desc=None,
...@@ -1638,9 +1649,8 @@ class Block(object): ...@@ -1638,9 +1649,8 @@ class Block(object):
# record ops in tracer rather than blocks # record ops in tracer rather than blocks
# #
# TODO(minqiyang): add op stop_gradient support in static mode too. # TODO(minqiyang): add op stop_gradient support in static mode too.
# currently, we only support stop_gradient in imperative mode. # currently, we only support stop_gradient in dygraph mode.
_imperative_tracer().trace_op(op, _dygraph_tracer().trace_op(op, kwargs.get("stop_gradient", False))
kwargs.get("stop_gradient", False))
else: else:
op_desc = self.desc.append_op() op_desc = self.desc.append_op()
op = Operator( op = Operator(
...@@ -1699,7 +1709,7 @@ class Block(object): ...@@ -1699,7 +1709,7 @@ class Block(object):
return self.ops[start:end] return self.ops[start:end]
def _prepend_op(self, *args, **kwargs): def _prepend_op(self, *args, **kwargs):
if _in_imperative_mode(): if _in_dygraph_mode():
op = Operator( op = Operator(
self, self,
None, None,
...@@ -1707,8 +1717,7 @@ class Block(object): ...@@ -1707,8 +1717,7 @@ class Block(object):
inputs=kwargs.get("inputs", None), inputs=kwargs.get("inputs", None),
outputs=kwargs.get("outputs", None), outputs=kwargs.get("outputs", None),
attrs=kwargs.get("attrs", None)) attrs=kwargs.get("attrs", None))
_imperative_tracer().trace_op(op, _dygraph_tracer().trace_op(op, kwargs.get("stop_gradient", False))
kwargs.get("stop_gradient", False))
else: else:
op_desc = self.desc._prepend_op() op_desc = self.desc._prepend_op()
op = Operator( op = Operator(
...@@ -3511,22 +3520,22 @@ def _get_var(name, program=None): ...@@ -3511,22 +3520,22 @@ def _get_var(name, program=None):
@signature_safe_contextmanager @signature_safe_contextmanager
def _imperative_guard(tracer): def _dygraph_guard(tracer):
global _imperative_tracer_ global _dygraph_tracer_
tmp_trace = _imperative_tracer_ tmp_trace = _dygraph_tracer_
_imperative_tracer_ = tracer _dygraph_tracer_ = tracer
yield yield
_imperative_tracer_ = tmp_trace _dygraph_tracer_ = tmp_trace
@signature_safe_contextmanager @signature_safe_contextmanager
def _imperative_place_guard(place): def _dygraph_place_guard(place):
global _imperative_current_expected_place_ global _dygraph_current_expected_place_
tmp_place = _imperative_current_expected_place_ tmp_place = _dygraph_current_expected_place_
_imperative_current_expected_place_ = place _dygraph_current_expected_place_ = place
yield yield
_imperative_current_expected_place_ = tmp_place _dygraph_current_expected_place_ = tmp_place
...@@ -165,7 +165,7 @@ class ConstantInitializer(Initializer): ...@@ -165,7 +165,7 @@ class ConstantInitializer(Initializer):
'force_cpu': self._force_cpu or force_init_on_cpu() 'force_cpu': self._force_cpu or force_init_on_cpu()
}, },
stop_gradient=True) stop_gradient=True)
if not framework._in_imperative_mode(): if not framework._in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -245,7 +245,7 @@ class UniformInitializer(Initializer): ...@@ -245,7 +245,7 @@ class UniformInitializer(Initializer):
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
if not framework._in_imperative_mode(): if not framework._in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -324,7 +324,7 @@ class NormalInitializer(Initializer): ...@@ -324,7 +324,7 @@ class NormalInitializer(Initializer):
outputs={"Out": var}, outputs={"Out": var},
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
if not framework._in_imperative_mode(): if not framework._in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -403,7 +403,7 @@ class TruncatedNormalInitializer(Initializer): ...@@ -403,7 +403,7 @@ class TruncatedNormalInitializer(Initializer):
outputs={"Out": var}, outputs={"Out": var},
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
if not framework._in_imperative_mode(): if not framework._in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -509,7 +509,7 @@ class XavierInitializer(Initializer): ...@@ -509,7 +509,7 @@ class XavierInitializer(Initializer):
"seed": self._seed "seed": self._seed
}, },
stop_gradient=True) stop_gradient=True)
if not framework._in_imperative_mode(): if not framework._in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -610,7 +610,7 @@ class MSRAInitializer(Initializer): ...@@ -610,7 +610,7 @@ class MSRAInitializer(Initializer):
"seed": self._seed "seed": self._seed
}, },
stop_gradient=True) stop_gradient=True)
if not framework._in_imperative_mode(): if not framework._in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -709,7 +709,7 @@ class BilinearInitializer(Initializer): ...@@ -709,7 +709,7 @@ class BilinearInitializer(Initializer):
'shape': list(shape), 'shape': list(shape),
value_name: values value_name: values
}) })
if not framework._in_imperative_mode(): if not framework._in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -768,7 +768,7 @@ class NumpyArrayInitializer(Initializer): ...@@ -768,7 +768,7 @@ class NumpyArrayInitializer(Initializer):
value_name: values value_name: values
}, },
stop_gradient=True) stop_gradient=True)
if not framework._in_imperative_mode(): if not framework._in_dygraph_mode():
var.op = op var.op = op
return op return op
......
...@@ -17,7 +17,7 @@ from .param_attr import ParamAttr ...@@ -17,7 +17,7 @@ from .param_attr import ParamAttr
from .initializer import Constant from .initializer import Constant
from . import layers from . import layers
from . import backward from . import backward
from .imperative import Layer, nn from .dygraph import Layer, nn
from . import executor from . import executor
from . import core from . import core
......
...@@ -17,7 +17,7 @@ from __future__ import print_function ...@@ -17,7 +17,7 @@ from __future__ import print_function
import copy import copy
import six import six
from .framework import Parameter, dtype_is_floating, _in_imperative_mode from .framework import Parameter, dtype_is_floating, _in_dygraph_mode
from . import unique_name from . import unique_name
from paddle.fluid.initializer import Constant, Xavier from paddle.fluid.initializer import Constant, Xavier
from .param_attr import ParamAttr from .param_attr import ParamAttr
...@@ -30,9 +30,9 @@ class LayerHelper(LayerHelperBase): ...@@ -30,9 +30,9 @@ class LayerHelper(LayerHelperBase):
def __init__(self, layer_type, **kwargs): def __init__(self, layer_type, **kwargs):
self.kwargs = kwargs self.kwargs = kwargs
name = self.kwargs.get('name', None) name = self.kwargs.get('name', None)
# TODO(panyx0718, minqiyang): imperative mode # TODO(panyx0718, minqiyang): dygraph mode
# can not use both `layer_type` and `name`. Deprecate LayerHelper # can not use both `layer_type` and `name`. Deprecate LayerHelper
# and write a Helper for imperative mode. # and write a Helper for dygraph mode.
if name is None: if name is None:
self.kwargs['name'] = unique_name.generate(layer_type) self.kwargs['name'] = unique_name.generate(layer_type)
......
...@@ -17,7 +17,7 @@ from __future__ import print_function ...@@ -17,7 +17,7 @@ from __future__ import print_function
import copy import copy
import numpy as np import numpy as np
from .framework import Variable, default_main_program, default_startup_program, _in_imperative_mode, _current_expected_place from .framework import Variable, default_main_program, default_startup_program, _in_dygraph_mode, _current_expected_place
from . import unique_name from . import unique_name
from .param_attr import ParamAttr, WeightNormParamAttr from .param_attr import ParamAttr, WeightNormParamAttr
from . import core from . import core
...@@ -54,8 +54,8 @@ class LayerHelperBase(object): ...@@ -54,8 +54,8 @@ class LayerHelperBase(object):
Return Variable construct from value Return Variable construct from value
""" """
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
assert _in_imperative_mode( assert _in_dygraph_mode(
), "to_variable could only be called in imperative mode" ), "to_variable could only be called in dygraph mode"
if not block: if not block:
block = default_main_program().current_block() block = default_main_program().current_block()
...@@ -302,8 +302,8 @@ class LayerHelperBase(object): ...@@ -302,8 +302,8 @@ class LayerHelperBase(object):
param = self._create_weight_normalize(attr, shape, dtype) param = self._create_weight_normalize(attr, shape, dtype)
WeightNormParamAttr.params_with_weight_norm.append(param) WeightNormParamAttr.params_with_weight_norm.append(param)
return param return param
if _in_imperative_mode(): if _in_dygraph_mode():
# In imperative mode, we want the returned parameter to be # In dygraph mode, we want the returned parameter to be
# initialized so that it can be used imperatively. # initialized so that it can be used imperatively.
return self.main_program.global_block().create_parameter( return self.main_program.global_block().create_parameter(
dtype=dtype, dtype=dtype,
...@@ -370,7 +370,7 @@ class LayerHelperBase(object): ...@@ -370,7 +370,7 @@ class LayerHelperBase(object):
initializer: initializer to use initializer: initializer to use
""" """
assert isinstance(var, Variable) assert isinstance(var, Variable)
if _in_imperative_mode(): if _in_dygraph_mode():
initializer(var, var.block) initializer(var, var.block)
else: else:
self.startup_program.global_block().create_var( self.startup_program.global_block().create_var(
......
...@@ -929,9 +929,9 @@ def array_read(array, i): ...@@ -929,9 +929,9 @@ def array_read(array, i):
Examples: Examples:
.. code-block:: python .. code-block:: python
tmp = fluid.layers.zeros(shape=[10], dtype='int32') array = fluid.layers.create_array(dtype='float32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
arr = layers.array_read(tmp, i=i) item = fluid.layers.array_read(array, i)
""" """
helper = LayerHelper('array_read', **locals()) helper = LayerHelper('array_read', **locals())
if not isinstance( if not isinstance(
......
...@@ -22,18 +22,21 @@ strategy according to this module. ...@@ -22,18 +22,21 @@ strategy according to this module.
from __future__ import print_function from __future__ import print_function
import math
from . import control_flow from . import control_flow
from . import nn from . import nn
from . import ops from . import ops
from . import tensor from . import tensor
from ..initializer import init_on_cpu from ..initializer import init_on_cpu
from ..framework import default_main_program, Parameter, unique_name, name_scope from ..framework import default_main_program, Parameter, unique_name, name_scope
import math from ..dygraph import base as imperative_base
from ..dygraph import learning_rate_scheduler as imperate_lr
__all__ = [ __all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS', 'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS',
'cosine_decay' 'cosine_decay', 'linear_lr_warmup'
] ]
...@@ -66,13 +69,17 @@ def noam_decay(d_model, warmup_steps): ...@@ -66,13 +69,17 @@ def noam_decay(d_model, warmup_steps):
The decayed learning rate. The decayed learning rate.
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
global_step = _decay_step_counter(1) if imperative_base.enabled():
decay = imperate_lr.NoamDecay(d_model, warmup_steps)
return decay
else:
global_step = _decay_step_counter(1)
a = global_step**-0.5 a = global_step**-0.5
b = (warmup_steps**-1.5) * global_step b = (warmup_steps**-1.5) * global_step
lr_value = (d_model**-0.5) * nn.elementwise_min(a, b) lr_value = (d_model**-0.5) * nn.elementwise_min(a, b)
return lr_value return lr_value
def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False): def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
...@@ -112,14 +119,19 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False): ...@@ -112,14 +119,19 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
global_step = _decay_step_counter() if imperative_base.enabled():
decay = imperate_lr.ExponentialDecay(learning_rate, decay_steps,
decay_rate, staircase)
return decay
else:
global_step = _decay_step_counter()
div_res = global_step / decay_steps div_res = global_step / decay_steps
if staircase: if staircase:
div_res = ops.floor(div_res) div_res = ops.floor(div_res)
decayed_lr = learning_rate * (decay_rate**div_res) decayed_lr = learning_rate * (decay_rate**div_res)
return decayed_lr return decayed_lr
def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False): def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
...@@ -141,14 +153,19 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False): ...@@ -141,14 +153,19 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
The decayed learning rate The decayed learning rate
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
global_step = _decay_step_counter() if imperative_base.enabled():
decay = imperate_lr.NaturalExpDecay(learning_rate, decay_steps,
decay_rate, staircase)
return decay
else:
global_step = _decay_step_counter()
div_res = global_step / decay_steps div_res = global_step / decay_steps
if staircase: if staircase:
div_res = ops.floor(div_res) div_res = ops.floor(div_res)
decayed_lr = learning_rate * ops.exp(-1 * decay_rate * div_res) decayed_lr = learning_rate * ops.exp(-1 * decay_rate * div_res)
return decayed_lr return decayed_lr
def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False): def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
...@@ -187,15 +204,20 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False): ...@@ -187,15 +204,20 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
global_step = _decay_step_counter() if imperative_base.enabled():
decay = imperate_lr.InverseTimeDecay(learning_rate, decay_steps,
decay_rate, staircase)
return decay
else:
global_step = _decay_step_counter()
div_res = global_step / decay_steps div_res = global_step / decay_steps
if staircase: if staircase:
div_res = ops.floor(div_res) div_res = ops.floor(div_res)
decayed_lr = learning_rate / (1 + decay_rate * div_res) decayed_lr = learning_rate / (1 + decay_rate * div_res)
return decayed_lr return decayed_lr
def polynomial_decay(learning_rate, def polynomial_decay(learning_rate,
...@@ -227,27 +249,33 @@ def polynomial_decay(learning_rate, ...@@ -227,27 +249,33 @@ def polynomial_decay(learning_rate,
Variable: The decayed learning rate Variable: The decayed learning rate
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
global_step = _decay_step_counter() if imperative_base.enabled():
decay = imperate_lr.PolynomialDecay(learning_rate, decay_steps,
if cycle: end_learning_rate, power, cycle)
div_res = ops.ceil(global_step / decay_steps) return decay
zero_var = tensor.fill_constant(
shape=[1], dtype='float32', value=0.0)
one_var = tensor.fill_constant(
shape=[1], dtype='float32', value=1.0)
with control_flow.Switch() as switch:
with switch.case(global_step == zero_var):
tensor.assign(input=one_var, output=div_res)
decay_steps = decay_steps * div_res
else: else:
decay_steps_var = tensor.fill_constant( global_step = _decay_step_counter()
shape=[1], dtype='float32', value=float(decay_steps))
global_step = nn.elementwise_min(x=global_step, y=decay_steps_var) if cycle:
div_res = ops.ceil(global_step / decay_steps)
zero_var = tensor.fill_constant(
shape=[1], dtype='float32', value=0.0)
one_var = tensor.fill_constant(
shape=[1], dtype='float32', value=1.0)
with control_flow.Switch() as switch:
with switch.case(global_step == zero_var):
tensor.assign(input=one_var, output=div_res)
decay_steps = decay_steps * div_res
else:
decay_steps_var = tensor.fill_constant(
shape=[1], dtype='float32', value=float(decay_steps))
global_step = nn.elementwise_min(
x=global_step, y=decay_steps_var)
decayed_lr = (learning_rate - end_learning_rate) * \ decayed_lr = (learning_rate - end_learning_rate) * \
((1 - global_step / decay_steps) ** power) + end_learning_rate ((1 - global_step / decay_steps) ** power) + end_learning_rate
return decayed_lr return decayed_lr
def piecewise_decay(boundaries, values): def piecewise_decay(boundaries, values):
...@@ -279,34 +307,38 @@ def piecewise_decay(boundaries, values): ...@@ -279,34 +307,38 @@ def piecewise_decay(boundaries, values):
if len(values) - len(boundaries) != 1: if len(values) - len(boundaries) != 1:
raise ValueError("len(values) - len(boundaries) should be 1") raise ValueError("len(values) - len(boundaries) should be 1")
global_step = _decay_step_counter() if imperative_base.enabled():
decay = imperate_lr.PiecewiseDecay(boundaries, values, 0)
return decay
else:
global_step = _decay_step_counter()
lr = tensor.create_global_var( lr = tensor.create_global_var(
shape=[1], shape=[1],
value=0.0, value=0.0,
dtype='float32', dtype='float32',
persistable=True, persistable=True,
name="learning_rate") name="learning_rate")
with control_flow.Switch() as switch: with control_flow.Switch() as switch:
for i in range(len(boundaries)): for i in range(len(boundaries)):
boundary_val = tensor.fill_constant( boundary_val = tensor.fill_constant(
shape=[1],
dtype='float32',
value=float(boundaries[i]),
force_cpu=True)
value_var = tensor.fill_constant(
shape=[1], dtype='float32', value=float(values[i]))
with switch.case(global_step < boundary_val):
tensor.assign(value_var, lr)
last_value_var = tensor.fill_constant(
shape=[1], shape=[1],
dtype='float32', dtype='float32',
value=float(boundaries[i]), value=float(values[len(values) - 1]))
force_cpu=True) with switch.default():
value_var = tensor.fill_constant( tensor.assign(last_value_var, lr)
shape=[1], dtype='float32', value=float(values[i]))
with switch.case(global_step < boundary_val):
tensor.assign(value_var, lr)
last_value_var = tensor.fill_constant(
shape=[1],
dtype='float32',
value=float(values[len(values) - 1]))
with switch.default():
tensor.assign(last_value_var, lr)
return lr return lr
def cosine_decay(learning_rate, step_each_epoch, epochs): def cosine_decay(learning_rate, step_each_epoch, epochs):
...@@ -336,12 +368,17 @@ def cosine_decay(learning_rate, step_each_epoch, epochs): ...@@ -336,12 +368,17 @@ def cosine_decay(learning_rate, step_each_epoch, epochs):
learning_rate = base_lr, step_each_epoch=10000, epochs=120) learning_rate = base_lr, step_each_epoch=10000, epochs=120)
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
global_step = _decay_step_counter() if imperative_base.enabled():
decay = imperate_lr.CosineDecay(learning_rate, step_each_epoch,
epochs)
return decay
else:
global_step = _decay_step_counter()
cur_epoch = ops.floor(global_step / step_each_epoch) cur_epoch = ops.floor(global_step / step_each_epoch)
decayed_lr = learning_rate * 0.5 * ( decayed_lr = learning_rate * 0.5 * (
ops.cos(cur_epoch * math.pi / epochs) + 1) ops.cos(cur_epoch * math.pi / epochs) + 1)
return decayed_lr return decayed_lr
def append_LARS(params_grads, learning_rate, weight_decay): def append_LARS(params_grads, learning_rate, weight_decay):
...@@ -363,6 +400,9 @@ def append_LARS(params_grads, learning_rate, weight_decay): ...@@ -363,6 +400,9 @@ def append_LARS(params_grads, learning_rate, weight_decay):
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param))) / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
""" """
assert not imperative_base.enabled(
), "append_LARS is NOT supported in dygraph mode now"
def _balanced_weight(param_norm, grad_norm): def _balanced_weight(param_norm, grad_norm):
if weight_decay == 1.0: if weight_decay == 1.0:
return grad_norm + param_norm return grad_norm + param_norm
...@@ -383,3 +423,59 @@ def append_LARS(params_grads, learning_rate, weight_decay): ...@@ -383,3 +423,59 @@ def append_LARS(params_grads, learning_rate, weight_decay):
/ _balanced_weight(param_norm, grad_norm) / _balanced_weight(param_norm, grad_norm)
# set back param local learning rate # set back param local learning rate
param.optimize_attr['learning_rate'] = decayed_lr param.optimize_attr['learning_rate'] = decayed_lr
def linear_lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
"""
Applies linear learning rate warmup before the normal learning rate
scheduling.
.. code-block:: python
if global_step < warmup_steps:
linear_step = end_lr - start_lr
lr = start_lr + linear_step * (global_step / warmup_steps)
Args:
learning_rate (float | Variable): A float value or Variable.
warmup_steps (int): The warmup steps.
start_lr (float): The start learning of warmup.
end_lr (float): The end learning of warmup.
Returns:
The decayed learning rate in warmup period.
Examples:
.. code-block:: python
boundaries = [100, 200]
lr_steps = [0.1, 0.01, 0.001]
warmup_steps = 50
start_lr = 1. / 3.
end_lr = 0.1
decayed_lr = fluid.layers.linear_lr_warmup(
fluid.layers.piecewise_decay(boundaries, lr_steps),
warmup_steps, start_lr, end_lr)
"""
assert (isinstance(end_lr, float))
assert (isinstance(start_lr, float))
linear_step = end_lr - start_lr
with default_main_program()._lr_schedule_guard():
lr = tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate_warmup")
global_step = _decay_step_counter()
with control_flow.Switch() as switch:
with switch.case(global_step < warmup_steps):
decayed_lr = start_lr + linear_step * (global_step /
float(warmup_steps))
tensor.assign(decayed_lr, lr)
with switch.default():
tensor.assign(learning_rate, lr)
return lr
...@@ -23,8 +23,8 @@ import os ...@@ -23,8 +23,8 @@ import os
import inspect import inspect
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant, NumpyArrayInitializer from ..initializer import Normal, Constant, NumpyArrayInitializer
from ..framework import Variable, OpProtoHolder, _in_imperative_mode from ..framework import Variable, OpProtoHolder, _in_dygraph_mode
from ..imperative import base from ..dygraph import base
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_ from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_
from .tensor import concat, assign from .tensor import concat, assign
...@@ -32,7 +32,7 @@ from . import utils ...@@ -32,7 +32,7 @@ from . import utils
from .. import unique_name from .. import unique_name
from functools import reduce from functools import reduce
from .. import core from .. import core
from ..imperative import layers from ..dygraph import layers
__all__ = [ __all__ = [
'fc', 'fc',
...@@ -183,10 +183,12 @@ __all__ = [ ...@@ -183,10 +183,12 @@ __all__ = [
'get_tensor_from_selected_rows', 'get_tensor_from_selected_rows',
'lstm', 'lstm',
'shuffle_channel', 'shuffle_channel',
'temporal_shift',
'py_func', 'py_func',
'psroi_pool', 'psroi_pool',
'teacher_student_sigmoid_loss', 'teacher_student_sigmoid_loss',
'huber_loss', 'huber_loss',
'kldiv_loss',
'tree_conv', 'tree_conv',
'npair_loss', 'npair_loss',
'fsp_matrix', 'fsp_matrix',
...@@ -296,7 +298,6 @@ def fc(input, ...@@ -296,7 +298,6 @@ def fc(input,
data_2 = fluid.layers.data(name="data_2", shape=[24, 36], dtype="float32") data_2 = fluid.layers.data(name="data_2", shape=[24, 36], dtype="float32")
fc = fluid.layers.fc(input=[data_1, data_2], size=1000, act="tanh") fc = fluid.layers.fc(input=[data_1, data_2], size=1000, act="tanh")
""" """
helper = LayerHelper("fc", **locals()) helper = LayerHelper("fc", **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -1820,17 +1821,18 @@ def sequence_softmax(input, use_cudnn=False, name=None): ...@@ -1820,17 +1821,18 @@ def sequence_softmax(input, use_cudnn=False, name=None):
return softmax_out return softmax_out
def softmax(input, use_cudnn=False, name=None): def softmax(input, use_cudnn=False, name=None, axis=-1):
""" """
The input of the softmax operator is a tensor of any rank. The output tensor The input of the softmax operator is a tensor of any rank. The output tensor
has the same shape as the input. has the same shape as the input.
The input tensor will first be logically flattened to a 2-D matrix. The matrix's The dimension :attr:`axis` of the input tensor will be permuted to the last.
second dimension(row length) is as same as the last dimension of the input Then the input tensor will be logically flattened to a 2-D matrix. The matrix's
second dimension(row length) is the same as the dimension :attr:`axis` of the input
tensor, and the first dimension(column length) is the product of all other tensor, and the first dimension(column length) is the product of all other
dimensions of the input tensor. For each row of the matrix, the softmax operator dimensions of the input tensor. For each row of the matrix, the softmax operator
squashes the K-dimensional(K is the width of the matrix, which is also the size squashes the K-dimensional(K is the width of the matrix, which is also the size
of the input tensor's last dimension) vector of arbitrary real values to a of the input tensor's dimension :attr:`axis`) vector of arbitrary real values to a
K-dimensional vector of real values in the range [0, 1] that add up to 1. K-dimensional vector of real values in the range [0, 1] that add up to 1.
It computes the exponential of the given dimension and the sum of exponential It computes the exponential of the given dimension and the sum of exponential
...@@ -1852,6 +1854,9 @@ def softmax(input, use_cudnn=False, name=None): ...@@ -1852,6 +1854,9 @@ def softmax(input, use_cudnn=False, name=None):
False by default. Default: False False by default. Default: False
name (str|None): A name for this layer(optional). If set None, the layer name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None. will be named automatically. Default: None.
axis (int): The index of dimension to perform softmax calculations, it should
be in range :math:`[-1, rank - 1]`, while :math:`rank` is the rank of
input variable. Default: -1.
Returns: Returns:
Variable: output of softmax Variable: output of softmax
...@@ -1861,7 +1866,10 @@ def softmax(input, use_cudnn=False, name=None): ...@@ -1861,7 +1866,10 @@ def softmax(input, use_cudnn=False, name=None):
.. code-block:: python .. code-block:: python
fc = fluid.layers.fc(input=x, size=10) fc = fluid.layers.fc(input=x, size=10)
softmax = fluid.layers.softmax(input=fc) # perform softmax in the second dimension
softmax = fluid.layers.softmax(input=fc, axis=1)
# perform softmax in the last dimension
softmax = fluid.layers.softmax(input=fc, axis=-1)
""" """
helper = LayerHelper('softmax', **locals()) helper = LayerHelper('softmax', **locals())
...@@ -1871,7 +1879,8 @@ def softmax(input, use_cudnn=False, name=None): ...@@ -1871,7 +1879,8 @@ def softmax(input, use_cudnn=False, name=None):
type="softmax", type="softmax",
inputs={"X": input}, inputs={"X": input},
outputs={"Out": softmax_out}, outputs={"Out": softmax_out},
attrs={"use_cudnn": use_cudnn}) attrs={"axis": axis,
"use_cudnn": use_cudnn})
return softmax_out return softmax_out
...@@ -3279,6 +3288,8 @@ def layer_norm(input, ...@@ -3279,6 +3288,8 @@ def layer_norm(input,
>>> dtype='float32') >>> dtype='float32')
>>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
""" """
assert _in_dygraph_mode(
) is not True, "please use FC instead of fc in dygraph mode!"
helper = LayerHelper('layer_norm', **locals()) helper = LayerHelper('layer_norm', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -5866,11 +5877,49 @@ def multiplex(inputs, index): ...@@ -5866,11 +5877,49 @@ def multiplex(inputs, index):
""" """
${comment} ${comment}
>>> import paddle.fluid as fluid For Example:
>>> x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32')
>>> x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32') .. code-block:: text
>>> index = fluid.layers.data(name='index', shape=[1], dtype='int32')
>>> out = fluid.layers.multiplex(inputs=[x1, x2], index=index) case 1:
Given:
X = [[[0,0,3,4], [0,1,3,4], [0,2,4,4], [0,3,3,4]],
[[1,0,3,4], [1,1,7,8], [1,2,4,2], [1,3,3,4]],
[[2,0,3,4], [2,1,7,8], [2,2,4,2], [2,3,3,4]],
[[3,0,3,4], [3,1,7,8], [3,2,4,2], [3,3,3,4]]]
index = [3,0,1,2]
out:[[3 0 3 4] // X[3,0] (3 = index[i], 0 = i); i=0
[0 1 3 4] // X[0,1] (0 = index[i], 1 = i); i=1
[1 2 4 2] // X[1,2] (0 = index[i], 2 = i); i=2
[2 3 3 4]] // X[2,3] (0 = index[i], 3 = i); i=3
case 2:
Given:
X = [[[0,0,3,4], [0,1,3,4], [0,2,4,4], [0,3,3,4]],
[[1,0,3,4], [1,1,7,8], [1,2,4,2], [1,3,3,4]]]
index = [1,0]
out:[[1 0 3 4] // X[1,0] (3 = index[0], 0 = i); i=1
[0 1 3 4] // X[0,1] (0 = index[1], 1 = i); i=2
[0 2 4 4] // X[0,2] (0 = 0, 2 = i); i=3
[0 3 3 4]] // X[0,3] (0 = 0, 3 = i); i=4
Examples:
.. code-block:: python
import paddle.fluid as fluid
x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32')
x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32')
index = fluid.layers.data(name='index', shape=[1], dtype='int32')
out = fluid.layers.multiplex(inputs=[x1, x2], index=index)
Args: Args:
inputs (list): ${x_comment}. inputs (list): ${x_comment}.
...@@ -6405,8 +6454,8 @@ def squeeze(input, axes, name=None): ...@@ -6405,8 +6454,8 @@ def squeeze(input, axes, name=None):
x = layers.data(name='x', shape=[5, 1, 10]) x = layers.data(name='x', shape=[5, 1, 10])
y = layers.sequeeze(input=x, axes=[1]) y = layers.sequeeze(input=x, axes=[1])
""" """
assert not _in_imperative_mode(), ( assert not _in_dygraph_mode(), (
"squeeze layer is not supported in imperative mode yet.") "squeeze layer is not supported in dygraph mode yet.")
helper = LayerHelper("squeeze", **locals()) helper = LayerHelper("squeeze", **locals())
out = helper.create_variable_for_type_inference(dtype=input.dtype) out = helper.create_variable_for_type_inference(dtype=input.dtype)
x_shape = helper.create_variable_for_type_inference(dtype=input.dtype) x_shape = helper.create_variable_for_type_inference(dtype=input.dtype)
...@@ -9144,7 +9193,7 @@ def _elementwise_op(helper): ...@@ -9144,7 +9193,7 @@ def _elementwise_op(helper):
op_type = helper.layer_type op_type = helper.layer_type
x = helper.kwargs.get('x', None) x = helper.kwargs.get('x', None)
y = helper.kwargs.get('y', None) y = helper.kwargs.get('y', None)
if _in_imperative_mode(): if _in_dygraph_mode():
x = base.to_variable(x) x = base.to_variable(x)
y = base.to_variable(y) y = base.to_variable(y)
...@@ -10352,6 +10401,48 @@ def shuffle_channel(x, group, name=None): ...@@ -10352,6 +10401,48 @@ def shuffle_channel(x, group, name=None):
return out return out
@templatedoc()
def temporal_shift(x, seg_num, shift_ratio=0.25, name=None):
"""
**Temporal Shift Operator**
${comment}
Args:
x(Variable): ${x_comment}
seg_num(int): ${seg_num_comment}
shift_ratio(float): ${shift_ratio_comment}
name (str, default None): The name of this layer.
Returns:
out(Variable): The temporal shifting result is a tensor variable with the
same shape and same type as the input.
Raises:
TypeError: seg_num must be int type.
Examples:
.. code-block:: python
input = fluid.layers.data(name='input', shape=[4,2,2], dtype='float32')
out = fluid.layers.temporal_shift(x=input, seg_num=2, shift_ratio=0.2)
"""
helper = LayerHelper("temporal_shift", **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
if not isinstance(seg_num, int):
raise TypeError("seg_num must be int type.")
helper.append_op(
type="temporal_shift",
inputs={"X": x},
outputs={"Out": out},
attrs={"seg_num": seg_num,
"shift_ratio": shift_ratio})
return out
class PyFuncRegistry(object): class PyFuncRegistry(object):
_register_funcs = [] _register_funcs = []
...@@ -10672,6 +10763,38 @@ def huber_loss(input, label, delta): ...@@ -10672,6 +10763,38 @@ def huber_loss(input, label, delta):
return out return out
@templatedoc()
def kldiv_loss(x, target, reduction='mean', name=None):
"""
${comment}
Args:
x (Variable): ${x_comment}
target (Variable): ${target_comment}
reduction (Variable): ${reduction_comment}
name (str, default None): The name of this layer.
Returns:
kldiv\_loss (Variable): The KL divergence loss.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[4,2,2], dtype='float32')
target = fluid.layers.data(name='target', shape=[4,2,2], dtype='float32')
loss = fluid.layers.kldiv_loss(x=x, target=target, reduction='batchmean')
"""
helper = LayerHelper('kldiv_loss', **locals())
loss = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(
type='kldiv_loss',
inputs={'X': x,
'Target': target},
outputs={'Loss': loss},
attrs={'reduction': reduction})
return loss
@templatedoc() @templatedoc()
def tree_conv(nodes_vector, def tree_conv(nodes_vector,
edge_set, edge_set,
......
...@@ -20,7 +20,6 @@ from ..framework import convert_np_dtype_to_dtype_ ...@@ -20,7 +20,6 @@ from ..framework import convert_np_dtype_to_dtype_
from ..framework import Variable from ..framework import Variable
from ..initializer import Constant, force_init_on_cpu from ..initializer import Constant, force_init_on_cpu
from ..core import VarDesc from ..core import VarDesc
from ..imperative import base as imperative_base
from .layer_function_generator import templatedoc from .layer_function_generator import templatedoc
import numpy import numpy
......
...@@ -30,7 +30,8 @@ from .initializer import Constant ...@@ -30,7 +30,8 @@ from .initializer import Constant
from .layer_helper import LayerHelper from .layer_helper import LayerHelper
from .layers import ops from .layers import ops
from .regularizer import append_regularization_ops from .regularizer import append_regularization_ops
from .imperative import base as imperative_base from .dygraph import base as imperative_base
from .dygraph.learning_rate_scheduler import LearningRateDecay
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.layers import tensor from paddle.fluid.layers import tensor
from functools import reduce from functools import reduce
...@@ -54,9 +55,19 @@ class Optimizer(object): ...@@ -54,9 +55,19 @@ class Optimizer(object):
""" """
def __init__(self, learning_rate, regularization=None, name=None): def __init__(self, learning_rate, regularization=None, name=None):
if not isinstance(learning_rate, float) and \ if framework._in_dygraph_mode():
not isinstance(learning_rate, framework.Variable): if not isinstance(learning_rate, float) and \
raise TypeError("learning rate should be float or Variable") not isinstance(learning_rate, LearningRateDecay):
raise TypeError(
"learning rate should be float or LearningRateDecay, got %s here"
% type(learning_rate))
else:
if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, framework.Variable):
raise TypeError(
"learning rate should be float or Variable, got %s here" %
type(learning_rate))
self._name = name self._name = name
self.regularization = regularization self.regularization = regularization
self._learning_rate = learning_rate self._learning_rate = learning_rate
...@@ -80,24 +91,49 @@ class Optimizer(object): ...@@ -80,24 +91,49 @@ class Optimizer(object):
return self._opti_name_list return self._opti_name_list
def _create_global_learning_rate(self): def _create_global_learning_rate(self):
lr = self._global_learning_rate() if imperative_base.enabled():
# create learning rate Variable
if isinstance(self._learning_rate, float):
lr = self._global_learning_rate()
if isinstance(lr, framework.Variable): if isinstance(lr, framework.Variable):
return return
else: else:
if not isinstance(self._learning_rate, float): self._learning_rate_map[framework.default_main_program(
)] = layers.create_global_var(
name=unique_name.generate("learning_rate"),
shape=[1],
value=float(self._learning_rate),
dtype='float32' if self._dtype is None else self._dtype,
persistable=True)
# get learning rate Variable from LearningRateDecay
elif isinstance(self._learning_rate, LearningRateDecay):
self._learning_rate_map[framework.default_main_program(
)] = self._learning_rate()
else:
raise TypeError( raise TypeError(
"learning rate variable is create outside optimizer," "optimizer's learning rate must be float or LearningRateDecay"
"can not create new learning rate variable for new program") )
else:
lr = self._global_learning_rate()
# create learning rate in the current main program if isinstance(lr, framework.Variable):
self._learning_rate_map[framework.default_main_program( return
)] = layers.create_global_var( else:
name=unique_name.generate("learning_rate"), if not isinstance(self._learning_rate, float):
shape=[1], raise TypeError(
value=float(self._learning_rate), "learning rate variable is create outside optimizer,"
dtype='float32' if self._dtype is None else self._dtype, "can not create new learning rate variable for new program"
persistable=True) )
# create learning rate in the current main program
self._learning_rate_map[framework.default_main_program(
)] = layers.create_global_var(
name=unique_name.generate("learning_rate"),
shape=[1],
value=float(self._learning_rate),
dtype='float32' if self._dtype is None else self._dtype,
persistable=True)
def _global_learning_rate(self, program=None): def _global_learning_rate(self, program=None):
""" """
...@@ -169,7 +205,7 @@ class Optimizer(object): ...@@ -169,7 +205,7 @@ class Optimizer(object):
name = self._name + "_" + name name = self._name + "_" + name
if (name in self._accumulators and if (name in self._accumulators and
param.name in self._accumulators[name]): param.name in self._accumulators[name]):
if framework._in_imperative_mode(): if framework._in_dygraph_mode():
return self._accumulators[name][param.name] return self._accumulators[name][param.name]
raise Exception("Accumulator {} already exists for parameter {}". raise Exception("Accumulator {} already exists for parameter {}".
format(name, param.name)) format(name, param.name))
...@@ -326,12 +362,38 @@ class Optimizer(object): ...@@ -326,12 +362,38 @@ class Optimizer(object):
Examples: Examples:
See examples in `apply_gradients`. See examples in `apply_gradients`.
""" """
if callbacks is None: self._dtype = loss.dtype
callbacks = [error_clip_callback] if framework._in_dygraph_mode():
if parameter_list is not None:
parameters = parameter_list
else:
parameters = framework._dygraph_tracer().all_parameters()
params_grads = []
for param in parameters:
if not param.trainable:
continue
if param._ivar._grad_ivar() is not None:
# create gradient variable
grad_var = Variable(
block=loss.block,
name=param._ivar._grad_name(),
stop_gradient=True,
ivar=param._ivar._grad_ivar())
params_grads.append((param, grad_var))
else: else:
assert (isinstance(callbacks, list)) if callbacks is None:
callbacks.append(error_clip_callback) callbacks = [error_clip_callback]
return append_backward(loss, parameter_list, no_grad_set, callbacks) else:
assert (isinstance(callbacks, list))
program = loss.block.program
with program_guard(program, startup_program):
params_grads = append_backward(loss, parameter_list,
no_grad_set, callbacks)
# Note: since we can't use all_reduce_op now,
# dgc_op should be the last op of one grad.
self._append_dgc_ops(params_grads)
return params_grads
def apply_gradients(self, params_grads): def apply_gradients(self, params_grads):
""" """
...@@ -372,6 +434,30 @@ class Optimizer(object): ...@@ -372,6 +434,30 @@ class Optimizer(object):
return optimize_ops return optimize_ops
def apply_optimize(self, loss, startup_program, params_grads):
"""
Second part of `minimize`, appending optimization operators for
given `params_grads` pairs.
Args:
loss (Variable): loss variable to run optimizations.
startup_program (Program): startup_program for initializing parameters
in `parameter_list`.
params_grads (list): list of (param, grad) pair to do optimization.
Returns:
list: A list of operators appended to the current program.
"""
if framework._in_dygraph_mode():
with program_guard(framework.default_main_program(),
framework.default_startup_program()):
optimize_ops = self._create_optimization_pass(params_grads)
else:
program = loss.block.program
with program_guard(program, startup_program):
optimize_ops = self.apply_gradients(params_grads)
return optimize_ops
def minimize(self, def minimize(self,
loss, loss,
startup_program=None, startup_program=None,
...@@ -394,38 +480,13 @@ class Optimizer(object): ...@@ -394,38 +480,13 @@ class Optimizer(object):
tuple: (optimize_ops, params_grads) which are, list of operators appended; tuple: (optimize_ops, params_grads) which are, list of operators appended;
and list of (param, grad) Variables pair for optimization. and list of (param, grad) Variables pair for optimization.
""" """
self._dtype = loss.dtype params_grads = self.backward(
optimize_ops = [] loss,
if framework._in_imperative_mode(): startup_program=startup_program,
if parameter_list is not None: parameter_list=parameter_list,
parameters = parameter_list no_grad_set=no_grad_set)
else: optimize_ops = self.apply_optimize(
parameters = framework._imperative_tracer().all_parameters() loss, startup_program=startup_program, params_grads=params_grads)
params_grads = []
for param in parameters:
if not param.trainable:
continue
if param._ivar._grad_ivar() is not None:
# create gradient variable
grad_var = Variable(
block=loss.block,
name=param._ivar._grad_name(),
stop_gradient=True,
ivar=param._ivar._grad_ivar())
params_grads.append((param, grad_var))
with program_guard(framework.default_main_program(),
framework.default_startup_program()):
optimize_ops = self._create_optimization_pass(params_grads)
else:
program = loss.block.program
with program_guard(program, startup_program):
params_grads = self.backward(loss, startup_program,
parameter_list, no_grad_set)
# Note: since we can't use all_reduce_op now,
# dgc_op should be the last op of one grad.
self._append_dgc_ops(params_grads)
optimize_ops = self.apply_gradients(params_grads)
return optimize_ops, params_grads return optimize_ops, params_grads
...@@ -581,10 +642,10 @@ class DGCMomentumOptimizer(MomentumOptimizer): ...@@ -581,10 +642,10 @@ class DGCMomentumOptimizer(MomentumOptimizer):
DGC also uses momentum factor masking and warmup training to overcome the staleness problem caused by reduced communication. DGC also uses momentum factor masking and warmup training to overcome the staleness problem caused by reduced communication.
This optimizer will do two things: This optimizer will do two things:
1. Compress the gradient by get TopK import value from tensor \ 1. Compress the gradient by get TopK import value from tensor \
and use it for allreduce to reduce network bandwidth. and use it for allreduce to reduce network bandwidth.
2. Call momentum to optimize on the cost. 2. Call momentum to optimize on the cost.
Args: Args:
......
...@@ -78,7 +78,7 @@ list(REMOVE_ITEM TEST_OPS test_image_classification_resnet) ...@@ -78,7 +78,7 @@ list(REMOVE_ITEM TEST_OPS test_image_classification_resnet)
list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op) list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op)
list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) list(REMOVE_ITEM TEST_OPS test_nearest_interp_op)
list(REMOVE_ITEM TEST_OPS test_imperative_resnet) list(REMOVE_ITEM TEST_OPS test_imperative_resnet)
list(REMOVE_ITEM TEST_OPS test_imperative_optimizer) list(REMOVE_ITEM TEST_OPS test_imperative_mnist)
list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer) list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer)
foreach(TEST_OP ${TEST_OPS}) foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP}) py_test_modules(${TEST_OP} MODULES ${TEST_OP})
...@@ -89,7 +89,7 @@ py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op SERIAL) ...@@ -89,7 +89,7 @@ py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op SERIAL)
py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op SERIAL) py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op SERIAL)
py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS
FLAGS_cudnn_deterministic=1) FLAGS_cudnn_deterministic=1)
py_test_modules(test_imperative_optimizer MODULES test_imperative_optimizer ENVS py_test_modules(test_imperative_mnist MODULES test_imperative_mnist ENVS
FLAGS_cudnn_deterministic=1) FLAGS_cudnn_deterministic=1)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
py_test_modules(test_dist_train MODULES test_dist_train SERIAL) py_test_modules(test_dist_train MODULES test_dist_train SERIAL)
......
...@@ -262,14 +262,14 @@ class OpTest(unittest.TestCase): ...@@ -262,14 +262,14 @@ class OpTest(unittest.TestCase):
if isinstance(value, tuple): if isinstance(value, tuple):
data = value[0] data = value[0]
lod = value[1] lod = value[1]
v = fluid.imperative.base.to_variable(value=data) v = fluid.dygraph.base.to_variable(value=data)
v._ivar.value().get_tensor().set_recursive_sequence_lengths(lod) v._ivar.value().get_tensor().set_recursive_sequence_lengths(lod)
return v return v
else: else:
return fluid.imperative.base.to_variable(value) return fluid.dygraph.base.to_variable(value)
def _calc_imperative_output(self, place, parallel=False, no_check_set=None): def _calc_dygraph_output(self, place, parallel=False, no_check_set=None):
with fluid.imperative.base.guard(place=place): with fluid.dygraph.base.guard(place=place):
block = fluid.default_main_program().global_block() block = fluid.default_main_program().global_block()
# prepare input variable # prepare input variable
...@@ -316,7 +316,7 @@ class OpTest(unittest.TestCase): ...@@ -316,7 +316,7 @@ class OpTest(unittest.TestCase):
return outputs return outputs
def _calc_output(self, place, parallel=False, no_check_set=None): def _calc_output(self, place, parallel=False, no_check_set=None, loss=None):
program = Program() program = Program()
block = program.global_block() block = program.global_block()
self._append_ops(block) self._append_ops(block)
...@@ -329,8 +329,14 @@ class OpTest(unittest.TestCase): ...@@ -329,8 +329,14 @@ class OpTest(unittest.TestCase):
use_cuda = False use_cuda = False
if isinstance(place, fluid.CUDAPlace(0)): if isinstance(place, fluid.CUDAPlace(0)):
use_cuda = True use_cuda = True
executor = fluid.ParallelExecutor( if loss:
use_cuda=use_cuda, loss_name=loss.name, main_program=program) executor = fluid.ParallelExecutor(
use_cuda=use_cuda,
loss_name=loss.name,
main_program=program)
else:
executor = fluid.ParallelExecutor(
use_cuda=use_cuda, main_program=program)
else: else:
executor = Executor(place) executor = Executor(place)
...@@ -364,9 +370,9 @@ class OpTest(unittest.TestCase): ...@@ -364,9 +370,9 @@ class OpTest(unittest.TestCase):
atol, atol,
no_check_set=None, no_check_set=None,
equal_nan=False, equal_nan=False,
check_imperative=False): check_dygraph=False):
if check_imperative: if check_dygraph:
imperative_outs = self._calc_imperative_output( dygraph_outs = self._calc_dygraph_output(
place, no_check_set=no_check_set) place, no_check_set=no_check_set)
outs, fetch_list = self._calc_output(place, no_check_set=no_check_set) outs, fetch_list = self._calc_output(place, no_check_set=no_check_set)
...@@ -393,8 +399,8 @@ class OpTest(unittest.TestCase): ...@@ -393,8 +399,8 @@ class OpTest(unittest.TestCase):
type(sub_out)) type(sub_out))
for item in sub_out: for item in sub_out:
sub_out_name, expect = item[0], item[1] sub_out_name, expect = item[0], item[1]
if check_imperative: if check_dygraph:
imperative_actual = imperative_outs[sub_out_name][0] imperative_actual = dygraph_outs[sub_out_name][0]
imperative_actual_t = np.array( imperative_actual_t = np.array(
imperative_actual._ivar.value().get_tensor()) imperative_actual._ivar.value().get_tensor())
idx = find_actual(sub_out_name, fetch_list) idx = find_actual(sub_out_name, fetch_list)
...@@ -407,7 +413,7 @@ class OpTest(unittest.TestCase): ...@@ -407,7 +413,7 @@ class OpTest(unittest.TestCase):
actual_t, expect_t, atol=atol, equal_nan=equal_nan), actual_t, expect_t, atol=atol, equal_nan=equal_nan),
"Output (" + sub_out_name + ") has diff at " + "Output (" + sub_out_name + ") has diff at " +
str(place)) str(place))
if check_imperative: if check_dygraph:
self.assertTrue( self.assertTrue(
np.allclose( np.allclose(
imperative_actual_t, imperative_actual_t,
...@@ -415,21 +421,21 @@ class OpTest(unittest.TestCase): ...@@ -415,21 +421,21 @@ class OpTest(unittest.TestCase):
atol=atol, atol=atol,
equal_nan=equal_nan), equal_nan=equal_nan),
"Output (" + sub_out_name + ") has diff at " + "Output (" + sub_out_name + ") has diff at " +
str(place) + " in imperative mode") str(place) + " in dygraph mode")
if isinstance(expect, tuple): if isinstance(expect, tuple):
self.assertListEqual( self.assertListEqual(
actual.recursive_sequence_lengths(), expect[1], actual.recursive_sequence_lengths(), expect[1],
"Output (" + sub_out_name + "Output (" + sub_out_name +
") has different lod at " + str(place)) ") has different lod at " + str(place))
if check_imperative: if check_dygraph:
self.assertListEqual( self.assertListEqual(
imperative_actual._ivar.value().get_tensor() imperative_actual._ivar.value().get_tensor()
.recursive_sequence_lengths(), expect[1], .recursive_sequence_lengths(), expect[1],
"Output (" + out_name + ") has different lod at " + "Output (" + out_name + ") has different lod at " +
str(place) + " in imperative mode") str(place) + " in dygraph mode")
else: else:
if check_imperative: if check_dygraph:
imperative_actual = imperative_outs[out_name][0] imperative_actual = dygraph_outs[out_name][0]
imperative_actual_t = np.array( imperative_actual_t = np.array(
imperative_actual._ivar.value().get_tensor()) imperative_actual._ivar.value().get_tensor())
idx = find_actual(out_name, fetch_list) idx = find_actual(out_name, fetch_list)
...@@ -443,7 +449,7 @@ class OpTest(unittest.TestCase): ...@@ -443,7 +449,7 @@ class OpTest(unittest.TestCase):
"Output (" + out_name + ") has diff at " + str(place) + "Output (" + out_name + ") has diff at " + str(place) +
"\nExpect " + str(expect_t) + "\n" + "But Got" + "\nExpect " + str(expect_t) + "\n" + "But Got" +
str(actual_t) + " in class " + self.__class__.__name__) str(actual_t) + " in class " + self.__class__.__name__)
if check_imperative: if check_dygraph:
self.assertTrue( self.assertTrue(
np.allclose( np.allclose(
imperative_actual_t, imperative_actual_t,
...@@ -458,12 +464,12 @@ class OpTest(unittest.TestCase): ...@@ -458,12 +464,12 @@ class OpTest(unittest.TestCase):
self.assertListEqual(actual.recursive_sequence_lengths(), self.assertListEqual(actual.recursive_sequence_lengths(),
expect[1], "Output (" + out_name + expect[1], "Output (" + out_name +
") has different lod at " + str(place)) ") has different lod at " + str(place))
if check_imperative: if check_dygraph:
self.assertListEqual( self.assertListEqual(
imperative_actual._ivar.value().get_tensor() imperative_actual._ivar.value().get_tensor()
.recursive_sequence_lengths(), expect[1], .recursive_sequence_lengths(), expect[1],
"Output (" + out_name + ") has different lod at " + "Output (" + out_name + ") has different lod at " +
str(place) + " in imperative mode") str(place) + " in dygraph mode")
def _get_places(self): def _get_places(self):
if self.dtype == np.float16: if self.dtype == np.float16:
...@@ -490,11 +496,11 @@ class OpTest(unittest.TestCase): ...@@ -490,11 +496,11 @@ class OpTest(unittest.TestCase):
atol=1e-5, atol=1e-5,
no_check_set=None, no_check_set=None,
equal_nan=False, equal_nan=False,
check_imperative=False): check_dygraph=False):
places = self._get_places() places = self._get_places()
for place in places: for place in places:
self.check_output_with_place(place, atol, no_check_set, equal_nan, self.check_output_with_place(place, atol, no_check_set, equal_nan,
check_imperative) check_dygraph)
def check_output_customized(self, checker): def check_output_customized(self, checker):
places = self._get_places() places = self._get_places()
......
...@@ -18,7 +18,7 @@ import numpy as np ...@@ -18,7 +18,7 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
class L1(fluid.imperative.Layer): class L1(fluid.dygraph.Layer):
def __init__(self, prefix): def __init__(self, prefix):
super(L1, self).__init__(prefix) super(L1, self).__init__(prefix)
self._param_attr = fluid.ParamAttr( self._param_attr = fluid.ParamAttr(
...@@ -32,7 +32,7 @@ class L1(fluid.imperative.Layer): ...@@ -32,7 +32,7 @@ class L1(fluid.imperative.Layer):
return self.w1 + self.w2 return self.w1 + self.w2
class L2(fluid.imperative.Layer): class L2(fluid.dygraph.Layer):
def __init__(self, prefix): def __init__(self, prefix):
super(L2, self).__init__(prefix) super(L2, self).__init__(prefix)
self.layer1 = L1(self.full_name()) self.layer1 = L1(self.full_name())
...@@ -42,7 +42,7 @@ class L2(fluid.imperative.Layer): ...@@ -42,7 +42,7 @@ class L2(fluid.imperative.Layer):
return self.layer1() + self.layer2() return self.layer1() + self.layer2()
class L3(fluid.imperative.Layer): class L3(fluid.dygraph.Layer):
def __init__(self, prefix): def __init__(self, prefix):
super(L3, self).__init__(prefix) super(L3, self).__init__(prefix)
self.layer1 = L2(self.full_name()) self.layer1 = L2(self.full_name())
...@@ -54,7 +54,7 @@ class L3(fluid.imperative.Layer): ...@@ -54,7 +54,7 @@ class L3(fluid.imperative.Layer):
class TestBaseLayer(unittest.TestCase): class TestBaseLayer(unittest.TestCase):
def test_one_level(self): def test_one_level(self):
with fluid.imperative.guard(): with fluid.dygraph.guard():
l = L1('test_one_level') l = L1('test_one_level')
ret = l() ret = l()
self.assertEqual(l.w1.name, "test_one_level/L1_0.w_0") self.assertEqual(l.w1.name, "test_one_level/L1_0.w_0")
...@@ -62,7 +62,7 @@ class TestBaseLayer(unittest.TestCase): ...@@ -62,7 +62,7 @@ class TestBaseLayer(unittest.TestCase):
self.assertTrue(np.allclose(ret._numpy(), 0.2 * np.ones([2, 2]))) self.assertTrue(np.allclose(ret._numpy(), 0.2 * np.ones([2, 2])))
def test_three_level(self): def test_three_level(self):
with fluid.imperative.guard(): with fluid.dygraph.guard():
l = L3('test_three_level') l = L3('test_three_level')
names = [p.name for p in l.parameters()] names = [p.name for p in l.parameters()]
ret = l() ret = l()
......
...@@ -156,7 +156,7 @@ class TestGRUOp(OpTest): ...@@ -156,7 +156,7 @@ class TestGRUOp(OpTest):
} }
def test_check_output(self): def test_check_output(self):
self.check_output(atol=1e-8, check_imperative=True) self.check_output(atol=1e-8, check_dygraph=True)
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['Input', 'H0', 'Weight', 'Bias'], ['Hidden']) self.check_grad(['Input', 'H0', 'Weight', 'Bias'], ['Hidden'])
......
...@@ -18,11 +18,11 @@ import numpy as np ...@@ -18,11 +18,11 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.imperative.nn import FC from paddle.fluid.dygraph.nn import FC
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
class MyLayer(fluid.imperative.Layer): class MyLayer(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(MyLayer, self).__init__(name_scope) super(MyLayer, self).__init__(name_scope)
...@@ -34,7 +34,7 @@ class MyLayer(fluid.imperative.Layer): ...@@ -34,7 +34,7 @@ class MyLayer(fluid.imperative.Layer):
return [x] return [x]
class MyPyLayer(fluid.imperative.PyLayer): class MyPyLayer(fluid.dygraph.PyLayer):
def __init__(self): def __init__(self):
super(MyPyLayer, self).__init__() super(MyPyLayer, self).__init__()
...@@ -48,7 +48,7 @@ class MyPyLayer(fluid.imperative.PyLayer): ...@@ -48,7 +48,7 @@ class MyPyLayer(fluid.imperative.PyLayer):
return np.array(dout) * (1 - np.square(np.array(out))) return np.array(dout) * (1 - np.square(np.array(out)))
class MLP(fluid.imperative.Layer): class MLP(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), self._fc1 = FC(self.full_name(),
...@@ -71,7 +71,7 @@ class MLP(fluid.imperative.Layer): ...@@ -71,7 +71,7 @@ class MLP(fluid.imperative.Layer):
return x return x
class SimpleRNNCell(fluid.imperative.Layer): class SimpleRNNCell(fluid.dygraph.Layer):
def __init__(self, name_scope, step_input_size, hidden_size, output_size, def __init__(self, name_scope, step_input_size, hidden_size, output_size,
param_attr): param_attr):
super(SimpleRNNCell, self).__init__(name_scope) super(SimpleRNNCell, self).__init__(name_scope)
...@@ -159,7 +159,7 @@ class SimpleRNNCell(fluid.imperative.Layer): ...@@ -159,7 +159,7 @@ class SimpleRNNCell(fluid.imperative.Layer):
return reduce_out, hidden return reduce_out, hidden
class SimpleRNN(fluid.imperative.Layer): class SimpleRNN(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(SimpleRNN, self).__init__(name_scope) super(SimpleRNN, self).__init__(name_scope)
self.seq_len = 4 self.seq_len = 4
...@@ -194,10 +194,10 @@ class SimpleRNN(fluid.imperative.Layer): ...@@ -194,10 +194,10 @@ class SimpleRNN(fluid.imperative.Layer):
class TestImperative(unittest.TestCase): class TestImperative(unittest.TestCase):
def test_sum_op(self): def test_sum_op(self):
x = np.ones([2, 2], np.float32) x = np.ones([2, 2], np.float32)
with fluid.imperative.guard(): with fluid.dygraph.guard():
inputs = [] inputs = []
for _ in range(10): for _ in range(10):
inputs.append(fluid.imperative.base.to_variable(x)) inputs.append(fluid.dygraph.base.to_variable(x))
ret = fluid.layers.sums(inputs) ret = fluid.layers.sums(inputs)
loss = fluid.layers.reduce_sum(ret) loss = fluid.layers.reduce_sum(ret)
loss._backward() loss._backward()
...@@ -205,17 +205,17 @@ class TestImperative(unittest.TestCase): ...@@ -205,17 +205,17 @@ class TestImperative(unittest.TestCase):
self.assertTrue(np.allclose(inputs[0]._gradient(), x)) self.assertTrue(np.allclose(inputs[0]._gradient(), x))
def test_layer(self): def test_layer(self):
with fluid.imperative.guard(): with fluid.dygraph.guard():
cl = core.Layer() cl = core.Layer()
cl.forward([]) cl.forward([])
l = fluid.imperative.Layer("l") l = fluid.dygraph.Layer("l")
self.assertRaises(NotImplementedError, l.forward, []) self.assertRaises(NotImplementedError, l.forward, [])
def test_pylayer_func_id(self): def test_pylayer_func_id(self):
with fluid.imperative.guard(): with fluid.dygraph.guard():
class PyLayer1(fluid.imperative.PyLayer): class PyLayer1(fluid.dygraph.PyLayer):
def __init__(self): def __init__(self):
super(PyLayer1, self).__init__() super(PyLayer1, self).__init__()
...@@ -227,7 +227,7 @@ class TestImperative(unittest.TestCase): ...@@ -227,7 +227,7 @@ class TestImperative(unittest.TestCase):
def backward(input): def backward(input):
return input return input
class PyLayer2(fluid.imperative.PyLayer): class PyLayer2(fluid.dygraph.PyLayer):
def __init__(self): def __init__(self):
super(PyLayer2, self).__init__() super(PyLayer2, self).__init__()
...@@ -241,21 +241,21 @@ class TestImperative(unittest.TestCase): ...@@ -241,21 +241,21 @@ class TestImperative(unittest.TestCase):
py_layer_1 = PyLayer1() py_layer_1 = PyLayer1()
py_layer_2 = PyLayer2() py_layer_2 = PyLayer2()
py_layer_1(fluid.imperative.base.to_variable(np.ones([2, 2]))) py_layer_1(fluid.dygraph.base.to_variable(np.ones([2, 2])))
py_layer_2(fluid.imperative.base.to_variable(np.ones([2, 2]))) py_layer_2(fluid.dygraph.base.to_variable(np.ones([2, 2])))
id = py_layer_1.forward_id id = py_layer_1.forward_id
self.assertGreater(id, 0) self.assertGreater(id, 0)
self.assertEqual(py_layer_1.backward_id, id + 1) self.assertEqual(py_layer_1.backward_id, id + 1)
self.assertEqual(py_layer_2.forward_id, id + 2) self.assertEqual(py_layer_2.forward_id, id + 2)
self.assertEqual(py_layer_2.backward_id, id + 3) self.assertEqual(py_layer_2.backward_id, id + 3)
py_layer_1(fluid.imperative.base.to_variable(np.ones([2, 2]))) py_layer_1(fluid.dygraph.base.to_variable(np.ones([2, 2])))
self.assertEqual(py_layer_1.forward_id, id) self.assertEqual(py_layer_1.forward_id, id)
def test_pylayer(self): def test_pylayer(self):
np_inp = np.ones([2, 2], np.float32) np_inp = np.ones([2, 2], np.float32)
with fluid.imperative.guard(): with fluid.dygraph.guard():
my_py_layer = MyPyLayer() my_py_layer = MyPyLayer()
var_inp = fluid.imperative.base.to_variable(np_inp) var_inp = fluid.dygraph.base.to_variable(np_inp)
outs = my_py_layer(var_inp) outs = my_py_layer(var_inp)
dy_out = np.sum(outs[0]._numpy()) dy_out = np.sum(outs[0]._numpy())
outs[0]._backward() outs[0]._backward()
...@@ -282,8 +282,8 @@ class TestImperative(unittest.TestCase): ...@@ -282,8 +282,8 @@ class TestImperative(unittest.TestCase):
def test_layer_in_out(self): def test_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.imperative.guard(): with fluid.dygraph.guard():
var_inp = fluid.imperative.base.to_variable(np_inp) var_inp = fluid.dygraph.base.to_variable(np_inp)
l = MyLayer("my_layer") l = MyLayer("my_layer")
x = l(var_inp)[0] x = l(var_inp)[0]
self.assertIsNotNone(x) self.assertIsNotNone(x)
...@@ -310,8 +310,8 @@ class TestImperative(unittest.TestCase): ...@@ -310,8 +310,8 @@ class TestImperative(unittest.TestCase):
def test_mlp(self): def test_mlp(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.imperative.guard(): with fluid.dygraph.guard():
var_inp = fluid.imperative.base.to_variable(np_inp) var_inp = fluid.dygraph.base.to_variable(np_inp)
mlp = MLP("mlp") mlp = MLP("mlp")
out = mlp(var_inp) out = mlp(var_inp)
dy_out = out._numpy() dy_out = out._numpy()
...@@ -353,8 +353,8 @@ class TestImperative(unittest.TestCase): ...@@ -353,8 +353,8 @@ class TestImperative(unittest.TestCase):
[10.0, 11.0, 12.0]]) [10.0, 11.0, 12.0]])
np_inp = np_inp.reshape((1, 4, 3)) np_inp = np_inp.reshape((1, 4, 3))
np_inp = np_inp.astype(np.float32) np_inp = np_inp.astype(np.float32)
with fluid.imperative.guard(): with fluid.dygraph.guard():
var_inp = fluid.imperative.base.to_variable(np_inp) var_inp = fluid.dygraph.base.to_variable(np_inp)
var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3])
simple_rnn = SimpleRNN("simple_rnn") simple_rnn = SimpleRNN("simple_rnn")
outs, pre_hiddens = simple_rnn.forward(var_inp) outs, pre_hiddens = simple_rnn.forward(var_inp)
......
...@@ -18,11 +18,11 @@ import numpy as np ...@@ -18,11 +18,11 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
from paddle.fluid.imperative.base import to_variable from paddle.fluid.dygraph.base import to_variable
class SimpleImgConvPool(fluid.imperative.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -71,7 +71,7 @@ class SimpleImgConvPool(fluid.imperative.Layer): ...@@ -71,7 +71,7 @@ class SimpleImgConvPool(fluid.imperative.Layer):
return x return x
class MNIST(fluid.imperative.Layer): class MNIST(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__(name_scope)
...@@ -98,12 +98,12 @@ class MNIST(fluid.imperative.Layer): ...@@ -98,12 +98,12 @@ class MNIST(fluid.imperative.Layer):
return x return x
class TestImperativeCheckpoint(unittest.TestCase): class TestDygraphCheckpoint(unittest.TestCase):
def save_load_persistables(self): def save_load_persistables(self):
seed = 90 seed = 90
epoch_num = 1 epoch_num = 1
with fluid.imperative.guard(): with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
...@@ -135,14 +135,14 @@ class TestImperativeCheckpoint(unittest.TestCase): ...@@ -135,14 +135,14 @@ class TestImperativeCheckpoint(unittest.TestCase):
avg_loss._backward() avg_loss._backward()
sgd.minimize(avg_loss) sgd.minimize(avg_loss)
fluid.imperative.save_persistables(mnist, "save_dir") fluid.dygraph.save_persistables(mnist, "save_dir")
mnist.clear_gradients() mnist.clear_gradients()
for param in mnist.parameters(): for param in mnist.parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param._numpy()
mnist.load_dict( mnist.load_dict(
fluid.imperative.load_persistables(mnist, "save_dir")) fluid.dygraph.load_persistables(mnist, "save_dir"))
restore = mnist.parameters() restore = mnist.parameters()
......
...@@ -22,7 +22,7 @@ import paddle ...@@ -22,7 +22,7 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid.imperative.base import to_variable from paddle.fluid.dygraph.base import to_variable
# Can use Amusic dataset as the DeepCF describes. # Can use Amusic dataset as the DeepCF describes.
DATA_PATH = os.environ.get('DATA_PATH', '') DATA_PATH = os.environ.get('DATA_PATH', '')
...@@ -32,11 +32,11 @@ NUM_BATCHES = int(os.environ.get('NUM_BATCHES', 5)) ...@@ -32,11 +32,11 @@ NUM_BATCHES = int(os.environ.get('NUM_BATCHES', 5))
NUM_EPOCHES = int(os.environ.get('NUM_EPOCHES', 1)) NUM_EPOCHES = int(os.environ.get('NUM_EPOCHES', 1))
class DMF(fluid.imperative.Layer): class DMF(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(DMF, self).__init__(name_scope) super(DMF, self).__init__(name_scope)
self._user_latent = fluid.imperative.FC(self.full_name(), 256) self._user_latent = fluid.dygraph.FC(self.full_name(), 256)
self._item_latent = fluid.imperative.FC(self.full_name(), 256) self._item_latent = fluid.dygraph.FC(self.full_name(), 256)
self._user_layers = [] self._user_layers = []
self._item_layers = [] self._item_layers = []
...@@ -45,12 +45,12 @@ class DMF(fluid.imperative.Layer): ...@@ -45,12 +45,12 @@ class DMF(fluid.imperative.Layer):
self._user_layers.append( self._user_layers.append(
self.add_sublayer( self.add_sublayer(
'user_layer_%d' % i, 'user_layer_%d' % i,
fluid.imperative.FC( fluid.dygraph.FC(
self.full_name(), self._hid_sizes[i], act='relu'))) self.full_name(), self._hid_sizes[i], act='relu')))
self._item_layers.append( self._item_layers.append(
self.add_sublayer( self.add_sublayer(
'item_layer_%d' % i, 'item_layer_%d' % i,
fluid.imperative.FC( fluid.dygraph.FC(
self.full_name(), self._hid_sizes[i], act='relu'))) self.full_name(), self._hid_sizes[i], act='relu')))
def forward(self, users, items): def forward(self, users, items):
...@@ -63,18 +63,18 @@ class DMF(fluid.imperative.Layer): ...@@ -63,18 +63,18 @@ class DMF(fluid.imperative.Layer):
return fluid.layers.elementwise_mul(users, items) return fluid.layers.elementwise_mul(users, items)
class MLP(fluid.imperative.Layer): class MLP(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__(name_scope)
self._user_latent = fluid.imperative.FC(self.full_name(), 256) self._user_latent = fluid.dygraph.FC(self.full_name(), 256)
self._item_latent = fluid.imperative.FC(self.full_name(), 256) self._item_latent = fluid.dygraph.FC(self.full_name(), 256)
self._match_layers = [] self._match_layers = []
self._hid_sizes = [128, 64] self._hid_sizes = [128, 64]
for i in range(len(self._hid_sizes)): for i in range(len(self._hid_sizes)):
self._match_layers.append( self._match_layers.append(
self.add_sublayer( self.add_sublayer(
'match_layer_%d' % i, 'match_layer_%d' % i,
fluid.imperative.FC( fluid.dygraph.FC(
self.full_name(), self._hid_sizes[i], act='relu'))) self.full_name(), self._hid_sizes[i], act='relu')))
self._mat self._mat
...@@ -88,7 +88,7 @@ class MLP(fluid.imperative.Layer): ...@@ -88,7 +88,7 @@ class MLP(fluid.imperative.Layer):
return match_vec return match_vec
class DeepCF(fluid.imperative.Layer): class DeepCF(fluid.dygraph.Layer):
def __init__(self, name_scope, num_users, num_items, matrix): def __init__(self, name_scope, num_users, num_items, matrix):
super(DeepCF, self).__init__(name_scope) super(DeepCF, self).__init__(name_scope)
self._num_users = num_users self._num_users = num_users
...@@ -103,7 +103,7 @@ class DeepCF(fluid.imperative.Layer): ...@@ -103,7 +103,7 @@ class DeepCF(fluid.imperative.Layer):
self._mlp = MLP(self.full_name()) self._mlp = MLP(self.full_name())
self._dmf = DMF(self.full_name()) self._dmf = DMF(self.full_name())
self._match_fc = fluid.imperative.FC(self.full_name(), 1, act='sigmoid') self._match_fc = fluid.dygraph.FC(self.full_name(), 1, act='sigmoid')
def forward(self, users, items): def forward(self, users, items):
# users_emb = self._user_emb(users) # users_emb = self._user_emb(users)
...@@ -191,7 +191,7 @@ def load_data(DATA_PATH): ...@@ -191,7 +191,7 @@ def load_data(DATA_PATH):
np.expand_dims(labels_np, -1), num_users, num_items, matrix np.expand_dims(labels_np, -1), num_users, num_items, matrix
class TestImperativeDeepCF(unittest.TestCase): class TestDygraphDeepCF(unittest.TestCase):
def test_deefcf(self): def test_deefcf(self):
seed = 90 seed = 90
if DATA_PATH: if DATA_PATH:
...@@ -237,7 +237,7 @@ class TestImperativeDeepCF(unittest.TestCase): ...@@ -237,7 +237,7 @@ class TestImperativeDeepCF(unittest.TestCase):
fetch_list=[loss])[0] fetch_list=[loss])[0]
sys.stderr.write('static loss %s\n' % static_loss) sys.stderr.write('static loss %s\n' % static_loss)
with fluid.imperative.guard(): with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
......
...@@ -22,12 +22,12 @@ import paddle ...@@ -22,12 +22,12 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid.imperative.base import to_variable from paddle.fluid.dygraph.base import to_variable
class Discriminator(fluid.imperative.Layer): class Discriminator(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(Discriminator, self).__init__(name_scope) super(Discriminator, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), size=32, act='elu') self._fc1 = FC(self.full_name(), size=32, act='elu')
...@@ -38,7 +38,7 @@ class Discriminator(fluid.imperative.Layer): ...@@ -38,7 +38,7 @@ class Discriminator(fluid.imperative.Layer):
return self._fc2(x) return self._fc2(x)
class Generator(fluid.imperative.Layer): class Generator(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(Generator, self).__init__(name_scope) super(Generator, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), size=64, act='elu') self._fc1 = FC(self.full_name(), size=64, act='elu')
...@@ -51,7 +51,7 @@ class Generator(fluid.imperative.Layer): ...@@ -51,7 +51,7 @@ class Generator(fluid.imperative.Layer):
return self._fc3(x) return self._fc3(x)
class TestImperativeGAN(unittest.TestCase): class TestDygraphGAN(unittest.TestCase):
def test_gan_float32(self): def test_gan_float32(self):
seed = 90 seed = 90
...@@ -130,7 +130,7 @@ class TestImperativeGAN(unittest.TestCase): ...@@ -130,7 +130,7 @@ class TestImperativeGAN(unittest.TestCase):
scope.find_var(param.name).get_tensor()) scope.find_var(param.name).get_tensor())
dy_params = dict() dy_params = dict()
with fluid.imperative.guard(): with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
......
...@@ -22,16 +22,16 @@ import paddle ...@@ -22,16 +22,16 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid.imperative.base import to_variable from paddle.fluid.dygraph.base import to_variable
def gen_data(): def gen_data():
pass pass
class GraphConv(fluid.imperative.Layer): class GraphConv(fluid.dygraph.Layer):
def __init__(self, name_scope, in_features, out_features): def __init__(self, name_scope, in_features, out_features):
super(GraphConv, self).__init__(name_scope) super(GraphConv, self).__init__(name_scope)
...@@ -50,7 +50,7 @@ class GraphConv(fluid.imperative.Layer): ...@@ -50,7 +50,7 @@ class GraphConv(fluid.imperative.Layer):
return fluid.layers.matmul(adj, support) + self.bias return fluid.layers.matmul(adj, support) + self.bias
class GCN(fluid.imperative.Layer): class GCN(fluid.dygraph.Layer):
def __init__(self, name_scope, num_hidden): def __init__(self, name_scope, num_hidden):
super(GCN, self).__init__(name_scope) super(GCN, self).__init__(name_scope)
self.gc = GraphConv(self.full_name(), num_hidden, 32) self.gc = GraphConv(self.full_name(), num_hidden, 32)
...@@ -61,7 +61,7 @@ class GCN(fluid.imperative.Layer): ...@@ -61,7 +61,7 @@ class GCN(fluid.imperative.Layer):
return self.gc2(x, adj) return self.gc2(x, adj)
class TestImperativeGNN(unittest.TestCase): class TestDygraphGNN(unittest.TestCase):
def test_gnn_float32(self): def test_gnn_float32(self):
seed = 90 seed = 90
...@@ -115,7 +115,7 @@ class TestImperativeGNN(unittest.TestCase): ...@@ -115,7 +115,7 @@ class TestImperativeGNN(unittest.TestCase):
static_weight = np.array( static_weight = np.array(
scope.find_var(model.gc.weight.name).get_tensor()) scope.find_var(model.gc.weight.name).get_tensor())
with fluid.imperative.guard(): with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
import unittest
import numpy as np
import six
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
pool_size,
pool_stride,
pool_padding=0,
pool_type='max',
global_pooling=False,
conv_stride=1,
conv_padding=0,
conv_dilation=1,
conv_groups=1,
act=None,
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope)
self._conv2d = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
padding=conv_padding,
dilation=conv_dilation,
groups=conv_groups,
param_attr=None,
bias_attr=None,
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
pool_padding=pool_padding,
global_pooling=global_pooling,
use_cudnn=use_cudnn)
def forward(self, inputs):
x = self._conv2d(inputs)
x = self._pool2d(x)
return x
class MNIST(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4
SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = FC(self.full_name(),
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs):
x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x)
x = self._fc(x)
return x
class TestImperativeMnist(unittest.TestCase):
def test_mnist_float32(self):
seed = 90
epoch_num = 1
with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
mnist = MNIST("mnist")
sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
dy_param_init_value = {}
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()):
dy_x_data = np.array(
[x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(128, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label._stop_gradient = True
cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
dy_out = avg_loss._numpy()
if epoch == 0 and batch_id == 0:
for param in mnist.parameters():
dy_param_init_value[param.name] = param._numpy()
avg_loss._backward()
sgd.minimize(avg_loss)
mnist.clear_gradients()
dy_param_value = {}
for param in mnist.parameters():
dy_param_value[param.name] = param._numpy()
with new_program_scope():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
mnist = MNIST("mnist")
sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
img = fluid.layers.data(
name='pixel', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
sgd.minimize(avg_loss)
# initialize params and fetch them
static_param_init_value = {}
static_param_name_list = []
for param in mnist.parameters():
static_param_name_list.append(param.name)
out = exe.run(fluid.default_startup_program(),
fetch_list=static_param_name_list)
for i in range(len(static_param_name_list)):
static_param_init_value[static_param_name_list[i]] = out[i]
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()):
static_x_data = np.array(
[x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape([128, 1])
fetch_list = [avg_loss.name]
fetch_list.extend(static_param_name_list)
out = exe.run(
fluid.default_main_program(),
feed={"pixel": static_x_data,
"label": y_data},
fetch_list=fetch_list)
static_param_value = {}
static_out = out[0]
for i in range(1, len(out)):
static_param_value[static_param_name_list[i - 1]] = out[
i]
self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all()))
for key, value in six.iteritems(static_param_init_value):
self.assertTrue(np.allclose(value, dy_param_init_value[key]))
self.assertTrue(np.allclose(static_out, dy_out))
for key, value in six.iteritems(static_param_value):
self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5))
if __name__ == '__main__':
unittest.main()
...@@ -22,131 +22,71 @@ import six ...@@ -22,131 +22,71 @@ import six
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer, Adam
from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC from paddle.fluid.dygraph.nn import FC
from paddle.fluid.imperative.base import to_variable from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
class SimpleImgConvPool(fluid.imperative.Layer): class MLP(fluid.dygraph.Layer):
def __init__(self, def __init__(self, name_scope, param_attr=None, bias_attr=None):
name_scope, super(MLP, self).__init__(name_scope)
num_channels,
num_filters,
filter_size,
pool_size,
pool_stride,
pool_padding=0,
pool_type='max',
global_pooling=False,
conv_stride=1,
conv_padding=0,
conv_dilation=1,
conv_groups=1,
act=None,
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope)
self._conv2d = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
padding=conv_padding,
dilation=conv_dilation,
groups=conv_groups,
param_attr=None,
bias_attr=None,
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
pool_padding=pool_padding,
global_pooling=global_pooling,
use_cudnn=use_cudnn)
def forward(self, inputs): self._fc1 = FC(self.full_name(), 10)
x = self._conv2d(inputs) self._fc2 = FC(self.full_name(), 10)
x = self._pool2d(x)
return x
class MNIST(fluid.imperative.Layer):
def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool( def forward(self, inputs):
self.full_name(), 1, 20, 5, 2, 2, act="relu") y = self._fc1(inputs)
y = self._fc2(y)
self._simple_img_conv_pool_2 = SimpleImgConvPool( return y
self.full_name(), 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4
SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = FC(self.full_name(),
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs): class TestImperativeOptimizerBase(unittest.TestCase):
x = self._simple_img_conv_pool_1(inputs) def setUp(self):
x = self._simple_img_conv_pool_2(x) self.batch_num = 20
x = self._fc(x)
return x
def get_optimizer(self):
raise NotImplementedError()
class TestImperativeMnist(unittest.TestCase): def _check_mlp(self):
def test_mnist_float32(self):
seed = 90 seed = 90
epoch_num = 1 with fluid.dygraph.guard():
with fluid.imperative.guard():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
mnist = MNIST("mnist") mlp = MLP('mlp')
sgd = SGDOptimizer(learning_rate=1e-3) optimizer = self.get_optimizer()
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True) paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
dy_param_init_value = {} dy_param_init_value = {}
for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()):
for batch_id, data in enumerate(train_reader()): if batch_id >= self.batch_num:
dy_x_data = np.array( break
[x[0].reshape(1, 28, 28)
for x in data]).astype('float32') dy_x_data = np.array(
y_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
[x[1] for x in data]).astype('int64').reshape(128, 1) y_data = np.array([x[1] for x in data]).astype('int64').reshape(
128, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data) img = to_variable(dy_x_data)
label._stop_gradient = True label = to_variable(y_data)
label._stop_gradient = True
cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label) cost = mlp(img)
avg_loss = fluid.layers.mean(loss) avg_loss = fluid.layers.reduce_mean(cost)
dy_out = avg_loss._numpy()
dy_out = avg_loss._numpy()
if batch_id == 0:
if epoch == 0 and batch_id == 0: for param in mlp.parameters():
for param in mnist.parameters(): dy_param_init_value[param.name] = param._numpy()
dy_param_init_value[param.name] = param._numpy()
avg_loss._backward()
avg_loss._backward() optimizer.minimize(avg_loss)
sgd.minimize(avg_loss) mlp.clear_gradients()
mnist.clear_gradients() dy_param_value = {}
for param in mlp.parameters():
dy_param_value = {} dy_param_value[param.name] = param._numpy()
for param in mnist.parameters():
dy_param_value[param.name] = param._numpy()
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
...@@ -155,23 +95,22 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -155,23 +95,22 @@ class TestImperativeMnist(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace( exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
mnist = MNIST("mnist") mlp = MLP('mlp')
sgd = SGDOptimizer(learning_rate=1e-3) optimizer = self.get_optimizer()
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True) paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
img = fluid.layers.data( img = fluid.layers.data(
name='pixel', shape=[1, 28, 28], dtype='float32') name='pixel', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
cost = mnist(img) cost = mlp(img)
loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.reduce_mean(cost)
avg_loss = fluid.layers.mean(loss) optimizer.minimize(avg_loss)
sgd.minimize(avg_loss)
# initialize params and fetch them # initialize params and fetch them
static_param_init_value = {} static_param_init_value = {}
static_param_name_list = [] static_param_name_list = []
for param in mnist.parameters(): for param in mlp.parameters():
static_param_name_list.append(param.name) static_param_name_list.append(param.name)
out = exe.run(fluid.default_startup_program(), out = exe.run(fluid.default_startup_program(),
...@@ -180,29 +119,26 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -180,29 +119,26 @@ class TestImperativeMnist(unittest.TestCase):
for i in range(len(static_param_name_list)): for i in range(len(static_param_name_list)):
static_param_init_value[static_param_name_list[i]] = out[i] static_param_init_value[static_param_name_list[i]] = out[i]
for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()):
for batch_id, data in enumerate(train_reader()): if batch_id >= self.batch_num:
static_x_data = np.array( break
[x[0].reshape(1, 28, 28)
for x in data]).astype('float32') static_x_data = np.array(
y_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
[x[1] for x in data]).astype('int64').reshape([128, 1]) y_data = np.array([x[1] for x in data]).astype('int64').reshape(
[128, 1])
fetch_list = [avg_loss.name]
fetch_list.extend(static_param_name_list) fetch_list = [avg_loss.name]
out = exe.run( fetch_list.extend(static_param_name_list)
fluid.default_main_program(), out = exe.run(fluid.default_main_program(),
feed={"pixel": static_x_data, feed={"pixel": static_x_data,
"label": y_data}, "label": y_data},
fetch_list=fetch_list) fetch_list=fetch_list)
static_param_value = {} static_param_value = {}
static_out = out[0] static_out = out[0]
for i in range(1, len(out)): for i in range(1, len(out)):
static_param_value[static_param_name_list[i - 1]] = out[ static_param_value[static_param_name_list[i - 1]] = out[i]
i]
self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all()))
for key, value in six.iteritems(static_param_init_value): for key, value in six.iteritems(static_param_init_value):
self.assertTrue(np.allclose(value, dy_param_init_value[key])) self.assertTrue(np.allclose(value, dy_param_init_value[key]))
...@@ -210,7 +146,92 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -210,7 +146,92 @@ class TestImperativeMnist(unittest.TestCase):
self.assertTrue(np.allclose(static_out, dy_out)) self.assertTrue(np.allclose(static_out, dy_out))
for key, value in six.iteritems(static_param_value): for key, value in six.iteritems(static_param_value):
self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5)) self.assertTrue(np.allclose(value, dy_param_value[key]))
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
bd = [3, 6, 9]
optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
return optimizer
def test_sgd(self):
self._check_mlp()
class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
return optimizer
def test_sgd(self):
self._check_mlp()
class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
return optimizer
def test_sgd(self):
self._check_mlp()
class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
return optimizer
def test_adam(self):
self._check_mlp()
class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay(
learning_rate=0.1, decay_steps=5, cycle=self.cycle))
return optimizer
def test_sgd_cycle(self):
self.cycle = True
self._check_mlp()
def test_sgd(self):
self.cycle = False
self._check_mlp()
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay(
learning_rate=0.1, step_each_epoch=10000, epochs=120))
return optimizer
def test_sgd(self):
self._check_mlp()
class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay(
d_model=512, warmup_steps=8000))
return optimizer
def test_sgd(self):
self._check_mlp()
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -16,17 +16,17 @@ from __future__ import print_function ...@@ -16,17 +16,17 @@ from __future__ import print_function
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.imperative.nn import Embedding from paddle.fluid.dygraph.nn import Embedding
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.base import to_variable from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
import numpy as np import numpy as np
import six import six
from paddle.fluid.backward import append_backward from paddle.fluid.backward import append_backward
class SimpleLSTMRNN(fluid.imperative.Layer): class SimpleLSTMRNN(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
hidden_size, hidden_size,
...@@ -131,7 +131,7 @@ class SimpleLSTMRNN(fluid.imperative.Layer): ...@@ -131,7 +131,7 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
return real_res, last_hidden, last_cell return real_res, last_hidden, last_cell
class PtbModel(fluid.imperative.Layer): class PtbModel(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
hidden_size, hidden_size,
...@@ -214,7 +214,7 @@ class PtbModel(fluid.imperative.Layer): ...@@ -214,7 +214,7 @@ class PtbModel(fluid.imperative.Layer):
return loss, last_hidden, last_cell return loss, last_hidden, last_cell
class TestImperativePtbRnn(unittest.TestCase): class TestDygraphPtbRnn(unittest.TestCase):
def test_ptb_rnn_cpu_float32(self): def test_ptb_rnn_cpu_float32(self):
seed = 90 seed = 90
hidden_size = 10 hidden_size = 10
...@@ -224,7 +224,7 @@ class TestImperativePtbRnn(unittest.TestCase): ...@@ -224,7 +224,7 @@ class TestImperativePtbRnn(unittest.TestCase):
init_scale = 0.1 init_scale = 0.1
batch_size = 4 batch_size = 4
with fluid.imperative.guard(): with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to # TODO: marsyang1993 Change seed to
......
...@@ -21,8 +21,8 @@ import paddle ...@@ -21,8 +21,8 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.imperative.nn import Conv2D, Pool2D, BatchNorm, FC from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.imperative.base import to_variable from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
batch_size = 8 batch_size = 8
...@@ -57,7 +57,7 @@ def optimizer_setting(params): ...@@ -57,7 +57,7 @@ def optimizer_setting(params):
lr = [] lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.SGD(learning_rate=0.01) optimizer = fluid.optimizer.SGD(learning_rate=0.01)
# TODO(minqiyang): Add learning rate scheduler support to imperative mode # TODO(minqiyang): Add learning rate scheduler support to dygraph mode
# optimizer = fluid.optimizer.Momentum( # optimizer = fluid.optimizer.Momentum(
# learning_rate=params["lr"], # learning_rate=params["lr"],
# learning_rate=fluid.layers.piecewise_decay( # learning_rate=fluid.layers.piecewise_decay(
...@@ -68,7 +68,7 @@ def optimizer_setting(params): ...@@ -68,7 +68,7 @@ def optimizer_setting(params):
return optimizer return optimizer
class ConvBNLayer(fluid.imperative.Layer): class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -99,7 +99,7 @@ class ConvBNLayer(fluid.imperative.Layer): ...@@ -99,7 +99,7 @@ class ConvBNLayer(fluid.imperative.Layer):
return y return y
class BottleneckBlock(fluid.imperative.Layer): class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -156,7 +156,7 @@ class BottleneckBlock(fluid.imperative.Layer): ...@@ -156,7 +156,7 @@ class BottleneckBlock(fluid.imperative.Layer):
return layer_helper.append_activation(y) return layer_helper.append_activation(y)
class ResNet(fluid.imperative.Layer): class ResNet(fluid.dygraph.Layer):
def __init__(self, name_scope, layers=50, class_dim=102): def __init__(self, name_scope, layers=50, class_dim=102):
super(ResNet, self).__init__(name_scope) super(ResNet, self).__init__(name_scope)
...@@ -226,13 +226,13 @@ class ResNet(fluid.imperative.Layer): ...@@ -226,13 +226,13 @@ class ResNet(fluid.imperative.Layer):
return y return y
class TestImperativeResnet(unittest.TestCase): class TestDygraphResnet(unittest.TestCase):
def test_resnet_float32(self): def test_resnet_float32(self):
seed = 90 seed = 90
batch_size = train_parameters["batch_size"] batch_size = train_parameters["batch_size"]
batch_num = 20 batch_num = 20
with fluid.imperative.guard(): with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
......
...@@ -16,7 +16,7 @@ from __future__ import print_function ...@@ -16,7 +16,7 @@ from __future__ import print_function
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.imperative import Embedding, LayerNorm, FC, to_variable, Layer, guard from paddle.fluid.dygraph import Embedding, LayerNorm, FC, to_variable, Layer, guard
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid import core from paddle.fluid import core
import numpy as np import numpy as np
...@@ -623,7 +623,7 @@ class PrepareEncoderDecoderLayer(Layer): ...@@ -623,7 +623,7 @@ class PrepareEncoderDecoderLayer(Layer):
initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), initializer=fluid.initializer.NumpyArrayInitializer(pos_inp),
trainable=False)) trainable=False))
# use in imperative_mode to fit different length batch # use in dygraph_mode to fit different length batch
# self._pos_emb._w = to_variable( # self._pos_emb._w = to_variable(
# position_encoding_init(self._src_max_len, self._src_emb_dim)) # position_encoding_init(self._src_max_len, self._src_emb_dim))
...@@ -946,7 +946,7 @@ class TransFormer(Layer): ...@@ -946,7 +946,7 @@ class TransFormer(Layer):
return sum_cost, avg_cost, predict, token_num return sum_cost, avg_cost, predict, token_num
class TestImperativeTransformer(unittest.TestCase): class TestDygraphTransformer(unittest.TestCase):
def test_transformer_float32(self): def test_transformer_float32(self):
seed = 90 seed = 90
with guard(): with guard():
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
import unittest
import numpy as np
from op_test import OpTest
def kldiv_loss(x, target, reduction):
output = target * (np.log(target) - x)
loss = np.where(target >= 0, output, np.zeros_like(x))
if reduction == "batchmean":
return loss.sum() / x.shape[0]
if reduction == "mean":
return loss.mean()
if reduction == "sum":
return loss.sum()
return loss
class TestKLDivLossOp(OpTest):
def setUp(self):
self.initTestCase()
self.op_type = 'kldiv_loss'
x = np.random.uniform(-10, 10, self.x_shape).astype('float32')
target = np.random.uniform(-10, 10, self.x_shape).astype('float32')
self.attrs = {"reduction": self.reduction}
self.inputs = {
'X': x,
'Target': target,
}
loss = kldiv_loss(x, target, self.reduction)
self.outputs = {'Loss': loss.astype('float32')}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(
['X'], 'Loss', no_grad_set=set(["Target"]), max_relative_error=0.06)
def initTestCase(self):
self.x_shape = (2, 5, 5)
self.reduction = 'batchmean'
class TestKLDivLossOp2(TestKLDivLossOp):
def initTestCase(self):
self.x_shape = (3, 2, 7, 7)
self.reduction = 'none'
class TestKLDivLossOp3(TestKLDivLossOp):
def initTestCase(self):
self.x_shape = (2, 3, 5, 7, 9)
self.reduction = 'mean'
class TestKLDivLossOp4(TestKLDivLossOp):
def initTestCase(self):
self.x_shape = (5, 7)
self.reduction = 'sum'
if __name__ == "__main__":
unittest.main()
...@@ -29,8 +29,8 @@ from paddle.fluid import core ...@@ -29,8 +29,8 @@ from paddle.fluid import core
from paddle.fluid.initializer import Constant from paddle.fluid.initializer import Constant
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid.imperative import nn from paddle.fluid.dygraph import nn
from paddle.fluid.imperative import base from paddle.fluid.dygraph import base
class LayerTest(unittest.TestCase): class LayerTest(unittest.TestCase):
...@@ -68,7 +68,7 @@ class LayerTest(unittest.TestCase): ...@@ -68,7 +68,7 @@ class LayerTest(unittest.TestCase):
@contextlib.contextmanager @contextlib.contextmanager
def dynamic_graph(self, force_to_use_cpu=False): def dynamic_graph(self, force_to_use_cpu=False):
with fluid.imperative.guard( with fluid.dygraph.guard(
self._get_place(force_to_use_cpu=force_to_use_cpu)): self._get_place(force_to_use_cpu=force_to_use_cpu)):
fluid.default_startup_program().random_seed = self.seed fluid.default_startup_program().random_seed = self.seed
fluid.default_main_program().random_seed = self.seed fluid.default_main_program().random_seed = self.seed
...@@ -845,7 +845,7 @@ class TestBook(unittest.TestCase): ...@@ -845,7 +845,7 @@ class TestBook(unittest.TestCase):
with program_guard(program): with program_guard(program):
data = layers.data(name='data', shape=[10], dtype='float32') data = layers.data(name='data', shape=[10], dtype='float32')
hid = layers.fc(input=data, size=20) hid = layers.fc(input=data, size=20)
self.assertIsNotNone(layers.softmax(hid)) self.assertIsNotNone(layers.softmax(hid, axis=1))
print(str(program)) print(str(program))
def test_space_to_depth(self): def test_space_to_depth(self):
...@@ -1591,6 +1591,23 @@ class TestBook(unittest.TestCase): ...@@ -1591,6 +1591,23 @@ class TestBook(unittest.TestCase):
out = layers.spectral_norm(weight, dim=1, power_iters=1) out = layers.spectral_norm(weight, dim=1, power_iters=1)
self.assertIsNotNone(out) self.assertIsNotNone(out)
def test_kldiv_loss(self):
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[32, 128, 128], dtype="float32")
target = layers.data(
name='target', shape=[32, 128, 128], dtype="float32")
loss = layers.kldiv_loss(x=x, target=target, reduction='batchmean')
self.assertIsNotNone(loss)
print(str(program))
def test_temporal_shift(self):
program = Program()
with program_guard(program):
x = layers.data(name="X", shape=[16, 4, 4], dtype="float32")
out = layers.temporal_shift(x, seg_num=4, shift_ratio=0.2)
self.assertIsNotNone(out)
print(str(program)) print(str(program))
def test_shuffle_channel(self): def test_shuffle_channel(self):
......
...@@ -120,9 +120,9 @@ class TestLearningRateDecay(unittest.TestCase): ...@@ -120,9 +120,9 @@ class TestLearningRateDecay(unittest.TestCase):
self.assertAlmostEqual( self.assertAlmostEqual(
python_decayed_lr, python_decayed_lr,
lr_val[0], lr_val[0],
msg='Failed fn is {0}, Python result is {1}, Fluid result is {2}'. msg='Failed lr scheduler is {0}, step {1}, Python result is {2}, Fluid result is {3}'.
format(python_decay_fn.__name__, format(python_decay_fn.__name__,
str(python_decayed_lr), str(lr_val[0]))) str(step), str(python_decayed_lr), str(lr_val[0])))
def test_decay(self): def test_decay(self):
common_kwargs_true = { common_kwargs_true = {
...@@ -164,12 +164,53 @@ class TestLearningRateDecay(unittest.TestCase): ...@@ -164,12 +164,53 @@ class TestLearningRateDecay(unittest.TestCase):
] ]
for py_decay_fn, fluid_decay_fn, kwargs in decay_fns: for py_decay_fn, fluid_decay_fn, kwargs in decay_fns:
print("decay_fn=" + py_decay_fn.__name__ + " kwargs=" + str(kwargs)) print("class=" + self.__class__.__name__ + "decay_fn=" +
py_decay_fn.__name__ + " kwargs=" + str(kwargs))
main_program = framework.Program() main_program = framework.Program()
startup_program = framework.Program() startup_program = framework.Program()
with framework.program_guard(main_program, startup_program): with framework.program_guard(main_program, startup_program):
self.check_decay(py_decay_fn, fluid_decay_fn, kwargs) self.check_decay(py_decay_fn, fluid_decay_fn, kwargs)
def linear_lr_warmup(global_step, warmup_steps, start_lr, end_lr):
linear_step = end_lr - start_lr
decayed_lr = start_lr + linear_step * (global_step / warmup_steps)
return decayed_lr
class TestLinearWamrupLearningRateDecay(TestLearningRateDecay):
def check_decay_with_place(self, place, python_decay_fn, fluid_decay_fn,
kwargs):
main_prog = fluid.Program()
startup_prog = fluid.Program()
warmup_steps = 10
start_lr = 1. / 3.
end_lr = 0.1
with fluid.program_guard(main_prog, startup_prog):
decayed_lr = layers.linear_lr_warmup(
fluid_decay_fn(**kwargs), warmup_steps, start_lr, end_lr)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
for step in range(20):
lr_val, = exe.run(main_prog, feed={}, fetch_list=[decayed_lr])
if step < warmup_steps:
python_decayed_lr = linear_lr_warmup(
float(step), warmup_steps, start_lr, end_lr)
else:
python_decayed_lr = python_decay_fn(
global_step=float(step), **kwargs)
self.assertAlmostEqual(
python_decayed_lr,
lr_val[0],
msg='Test {0} Failed, step {1}, Python result is {2}, Fluid result is {3}'.
format(python_decay_fn.__name__,
str(step), str(python_decayed_lr), str(lr_val[0])))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -177,6 +177,9 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -177,6 +177,9 @@ class TestMNIST(TestParallelExecutorBase):
for use_fast_executor in (False, True): for use_fast_executor in (False, True):
self.check_batchnorm_fc_convergence(use_cuda, use_fast_executor) self.check_batchnorm_fc_convergence(use_cuda, use_fast_executor)
# FIXME(wuyi): should checkout why this fails when merging
# https://github.com/PaddlePaddle/Paddle/pull/16545
@unittest.skip("should fix this later")
def test_batchnorm_fc_with_new_strategy(self): def test_batchnorm_fc_with_new_strategy(self):
# NOTE: the computation result of nccl_reduce is non-deterministic, # NOTE: the computation result of nccl_reduce is non-deterministic,
# related issue: https://github.com/NVIDIA/nccl/issues/157 # related issue: https://github.com/NVIDIA/nccl/issues/157
......
...@@ -31,6 +31,9 @@ class TestSoftmaxOp(OpTest): ...@@ -31,6 +31,9 @@ class TestSoftmaxOp(OpTest):
def get_x_shape(self): def get_x_shape(self):
return [10, 10] return [10, 10]
def get_axis(self):
return -1
def setUp(self): def setUp(self):
self.op_type = "softmax" self.op_type = "softmax"
self.use_cudnn = False self.use_cudnn = False
...@@ -38,15 +41,15 @@ class TestSoftmaxOp(OpTest): ...@@ -38,15 +41,15 @@ class TestSoftmaxOp(OpTest):
self.dtype = np.float32 self.dtype = np.float32
self.init_kernel_type() self.init_kernel_type()
self.shape = self.get_x_shape() self.shape = self.get_x_shape()
self.axis = self.get_axis()
x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
out = np.apply_along_axis(stable_softmax, 1, out = np.apply_along_axis(stable_softmax, self.axis, x)
x.reshape([-1, self.shape[-1]]))
out = out.reshape(self.shape)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out} self.outputs = {'Out': out}
self.attrs = { self.attrs = {
'axis': self.axis,
'use_cudnn': self.use_cudnn, 'use_cudnn': self.use_cudnn,
'use_mkldnn': self.use_mkldnn 'use_mkldnn': self.use_mkldnn
} }
...@@ -76,6 +79,38 @@ class TestSoftmaxOp2(TestSoftmaxOp): ...@@ -76,6 +79,38 @@ class TestSoftmaxOp2(TestSoftmaxOp):
return [2, 3, 4, 5] return [2, 3, 4, 5]
class TestSoftmaxOp3(TestSoftmaxOp):
def get_x_shape(self):
return [2, 3, 4, 5]
def get_axis(self):
return 0
class TestSoftmaxOp4(TestSoftmaxOp):
def get_x_shape(self):
return [2, 3, 4, 5]
def get_axis(self):
return 1
class TestSoftmaxOp5(TestSoftmaxOp):
def get_x_shape(self):
return [2, 3, 4, 5]
def get_axis(self):
return 2
class TestSoftmaxOp5(TestSoftmaxOp):
def get_x_shape(self):
return [2, 3, 4, 5]
def get_axis(self):
return 3
@unittest.skipIf(not core.is_compiled_with_cuda(), @unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA") "core is not compiled with CUDA")
class TestSoftmaxCUDNNOp(TestSoftmaxOp): class TestSoftmaxCUDNNOp(TestSoftmaxOp):
...@@ -90,6 +125,16 @@ class TestSoftmaxCUDNNOp2(TestSoftmaxCUDNNOp): ...@@ -90,6 +125,16 @@ class TestSoftmaxCUDNNOp2(TestSoftmaxCUDNNOp):
return [2, 3, 4, 5] return [2, 3, 4, 5]
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestSoftmaxCUDNNOp5(TestSoftmaxCUDNNOp):
def get_x_shape(self):
return [2, 3, 4, 5]
def get_axis(self):
return 3
@unittest.skipIf(not core.is_compiled_with_cuda(), @unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA") "core is not compiled with CUDA")
class TestSoftmaxFP16Op(TestSoftmaxOp): class TestSoftmaxFP16Op(TestSoftmaxOp):
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
import unittest
import numpy as np
from op_test import OpTest
from paddle.fluid import core
def temporal_shift(x, seg_num, shift_ratio):
shape = x.shape
reshape_x = x.reshape((-1, seg_num, shape[1], shape[2], shape[3]))
pad_x = np.pad(reshape_x, ((0, 0), (1, 1), (0, 0), (0, 0), (0, 0)),
'constant')
c1 = int(shape[1] * shift_ratio)
c2 = int(shape[1] * 2 * shift_ratio)
slice1 = pad_x[:, :seg_num, :c1, :, :]
slice2 = pad_x[:, 2:seg_num + 2, c1:c2, :, :]
slice3 = pad_x[:, 1:seg_num + 1, c2:, :, :]
concat_x = np.concatenate([slice1, slice2, slice3], axis=2)
return concat_x.reshape(shape)
class TestTemporalShift(OpTest):
def setUp(self):
self.initTestCase()
self.op_type = 'temporal_shift'
x = np.random.random(self.x_shape).astype('float32')
self.attrs = {
"seg_num": self.seg_num,
"shift_ratio": self.shift_ratio,
}
self.inputs = {"X": x, }
output = temporal_shift(x, self.seg_num, self.shift_ratio)
self.outputs = {"Out": output}
def test_check_output(self):
self.check_output()
def test_check_grad_ignore_uv(self):
self.check_grad(['X'], 'Out')
def initTestCase(self):
self.x_shape = (6, 4, 4, 4)
self.seg_num = 3
self.shift_ratio = 0.25
class TestTemporalShift2(TestTemporalShift):
def initTestCase(self):
self.x_shape = (4, 9, 7, 7)
self.seg_num = 2
self.shift_ratio = 0.2
class TestTemporalShift3(TestTemporalShift):
def initTestCase(self):
self.x_shape = (3, 10, 5, 5)
self.seg_num = 1
self.shift_ratio = 0.3
if __name__ == "__main__":
unittest.main()
...@@ -19,7 +19,6 @@ from paddle.fluid.framework import default_main_program, Program, convert_np_dty ...@@ -19,7 +19,6 @@ from paddle.fluid.framework import default_main_program, Program, convert_np_dty
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
import numpy as np import numpy as np
from test_imperative_base import new_program_scope
class TestVariable(unittest.TestCase): class TestVariable(unittest.TestCase):
...@@ -62,7 +61,7 @@ class TestVariable(unittest.TestCase): ...@@ -62,7 +61,7 @@ class TestVariable(unittest.TestCase):
name='step_scopes', type=core.VarDesc.VarType.STEP_SCOPES) name='step_scopes', type=core.VarDesc.VarType.STEP_SCOPES)
self.assertEqual(core.VarDesc.VarType.STEP_SCOPES, var.type) self.assertEqual(core.VarDesc.VarType.STEP_SCOPES, var.type)
def _test_slice(self): def _test_slice(self, place):
b = default_main_program().current_block() b = default_main_program().current_block()
w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0) w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0)
...@@ -84,7 +83,6 @@ class TestVariable(unittest.TestCase): ...@@ -84,7 +83,6 @@ class TestVariable(unittest.TestCase):
self.assertEqual(0, nw.lod_level) self.assertEqual(0, nw.lod_level)
place = fluid.CPUPlace()
main = fluid.Program() main = fluid.Program()
with fluid.program_guard(main): with fluid.program_guard(main):
exe = fluid.Executor(place) exe = fluid.Executor(place)
...@@ -101,10 +99,23 @@ class TestVariable(unittest.TestCase): ...@@ -101,10 +99,23 @@ class TestVariable(unittest.TestCase):
var6 = var[1, 1:, 1:] var6 = var[1, 1:, 1:]
var7 = var[1, ..., 1:] var7 = var[1, ..., 1:]
var8 = var[1, ...] var8 = var[1, ...]
var_reshape = fluid.layers.reshape(var, [3, -1, 3])
var9 = var_reshape[1, ..., 2]
var10 = var_reshape[:, :, -1]
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.fc(input=x, size=1, act=None)
var11 = y[:, 0]
feeder = fluid.DataFeeder(place=place, feed_list=[x])
data = []
data.append((np.random.randint(10, size=[13]).astype('float32')))
exe.run(fluid.default_startup_program())
local_out = exe.run(main, local_out = exe.run(main,
feed=feeder.feed([data]),
fetch_list=[ fetch_list=[
var, var1, var2, var3, var4, var5, var6, var, var1, var2, var3, var4, var5, var6,
var7, var8 var7, var8, var9, var10, var11
]) ])
self.assertTrue((np.array(local_out[1]) == np.array(tensor_array[ self.assertTrue((np.array(local_out[1]) == np.array(tensor_array[
...@@ -123,38 +134,16 @@ class TestVariable(unittest.TestCase): ...@@ -123,38 +134,16 @@ class TestVariable(unittest.TestCase):
1, ..., 1:])).all()) 1, ..., 1:])).all())
self.assertTrue((np.array(local_out[8]) == np.array(tensor_array[ self.assertTrue((np.array(local_out[8]) == np.array(tensor_array[
1, ...])).all()) 1, ...])).all())
self.assertEqual(local_out[9].shape, (1, 3, 1))
self.assertEqual(local_out[10].shape, (3, 3, 1))
self.assertEqual(local_out[11].shape, (1, 1))
def test_slice(self): def test_slice(self):
self._test_slice() place = fluid.CPUPlace()
self._test_slice(place)
class TestVariableImperative(unittest.TestCase):
def _test_slice(self):
b = default_main_program().current_block()
w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0)
for i in range(3):
nw = w[i]
self.assertEqual([1, 100, 100], nw.shape)
nw = w[:]
self.assertEqual([784, 100, 100], nw.shape)
nw = w[:, :, :]
self.assertEqual([784, 100, 100], nw.shape)
nw = w[::2, ::2, :]
self.assertEqual([392, 50, 100], nw.shape)
nw = w[::-2, ::-2, :]
self.assertEqual([392, 50, 100], nw.shape)
nw = w[0::-2, 0::-2, :]
self.assertEqual([1, 1, 100], nw.shape)
def test_slice(self): if core.is_compiled_with_cuda():
with fluid.imperative.guard(): self._test_slice(core.CUDAPlace(0))
self._test_slice()
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -102,7 +102,7 @@ packages=['paddle', ...@@ -102,7 +102,7 @@ packages=['paddle',
'paddle.reader', 'paddle.reader',
'paddle.distributed', 'paddle.distributed',
'paddle.fluid', 'paddle.fluid',
'paddle.fluid.imperative', 'paddle.fluid.dygraph',
'paddle.fluid.proto', 'paddle.fluid.proto',
'paddle.fluid.proto.profiler', 'paddle.fluid.proto.profiler',
'paddle.fluid.distributed', 'paddle.fluid.distributed',
...@@ -119,6 +119,7 @@ packages=['paddle', ...@@ -119,6 +119,7 @@ packages=['paddle',
'paddle.fluid.contrib.slim.quantization', 'paddle.fluid.contrib.slim.quantization',
'paddle.fluid.contrib.slim.distillation', 'paddle.fluid.contrib.slim.distillation',
'paddle.fluid.contrib.utils', 'paddle.fluid.contrib.utils',
'paddle.fluid.contrib.extend_optimizer',
'paddle.fluid.transpiler', 'paddle.fluid.transpiler',
'paddle.fluid.transpiler.details'] 'paddle.fluid.transpiler.details']
......
...@@ -28,7 +28,7 @@ import hashlib ...@@ -28,7 +28,7 @@ import hashlib
member_dict = collections.OrderedDict() member_dict = collections.OrderedDict()
experimental_namespace = {"paddle.fluid.imperative"} experimental_namespace = {"paddle.fluid.dygraph"}
def md5(doc): def md5(doc):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册