diff --git a/cmake/external/dgc.cmake b/cmake/external/dgc.cmake index 199ca88b47754638d5e93043e078d552261dc088..a58b8c68d7716a901db1907af64c4a344a24cfc6 100644 --- a/cmake/external/dgc.cmake +++ b/cmake/external/dgc.cmake @@ -34,7 +34,7 @@ ExternalProject_Add( BUILD_IN_SOURCE 1 ) -ADD_LIBRARY(dgc SHARED IMPORTED GLOBAL) +ADD_LIBRARY(dgc STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET dgc PROPERTY IMPORTED_LOCATION ${DGC_LIBRARIES}) ADD_DEPENDENCIES(dgc extern_dgc) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index e6f5cb7473cdac95afabef8b133131ad71867f7b..54fb8016f5b7141d5904d9d696f2385a0fa67881 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -95,7 +95,7 @@ paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '37042620f9bd3a2da6e5d3138b2f724b')) paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,)), ('document', 'a194fb80614023f543df3949fbd0d0b8')) paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '19ef6f9cdd27feac8a1ae060f19c10b4')) -paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', 'f19dd380864e61134ce3814e4be0de4b')) +paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name', 'axis'], varargs=None, keywords=None, defaults=(False, None, -1)), ('document', '59b1c6bf2f0fa9dc649c85fef3a3b2ea')) paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', 'bbd84e855e660cd1084bb71a2fd0cdaa')) paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', '043de7333b79ee0ac55053c14ed81625')) paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '859b887174d06f361658f69cb7c06d95')) @@ -134,7 +134,7 @@ paddle.fluid.layers.sampled_softmax_with_cross_entropy (ArgSpec(args=['logits', paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', '80641ee6810b1cdc3fd6e14fc89ecc9d')) paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', 'b350b9a30a18e7efd7e1bb740eef6996')) paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)), ('document', '17485788fffe4e2d36dc58c2ac8d174e')) -paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '013795af319e2e86d3506741941078ee')) +paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '2c4d1ae83da6ed35e3b36ba1b3b51d23')) paddle.fluid.layers.layer_norm (ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)), ('document', 'de6a906950bae9f3c245cb744d22b94e')) paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)), ('document', '419c3a24a83cc89219a029cf4092788b')) paddle.fluid.layers.spectral_norm (ArgSpec(args=['weight', 'dim', 'power_iters', 'eps', 'name'], varargs=None, keywords=None, defaults=(0, 1, 1e-12, None)), ('document', '3f536aafba30d793287b52d231baff1b')) @@ -225,10 +225,12 @@ paddle.fluid.layers.merge_selected_rows (ArgSpec(args=['x', 'name'], varargs=Non paddle.fluid.layers.get_tensor_from_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '7ffc849e71f31dfe29030ff94e662de6')) paddle.fluid.layers.lstm (ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1)), ('document', 'd5e6c494ac35100e2ed4d4bd9a1ed932')) paddle.fluid.layers.shuffle_channel (ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2fa6782d43d02ae64482d21235a82949')) +paddle.fluid.layers.temporal_shift (ArgSpec(args=['x', 'seg_num', 'shift_ratio', 'name'], varargs=None, keywords=None, defaults=(0.25, None)), ('document', 'fe4481fb31363b09cfdd228fc6776ddf')) paddle.fluid.layers.py_func (ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None)), ('document', '8404e472ac12b4a30a505d3d3a3e5fdb')) paddle.fluid.layers.psroi_pool (ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1546136806fef5c08f6918544bd9151d')) paddle.fluid.layers.teacher_student_sigmoid_loss (ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0)), ('document', '2f6ff96864054a31aa4bb659c6722c99')) paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None), ('document', '431a4301c35032166ec029f7432c80a7')) +paddle.fluid.layers.kldiv_loss (ArgSpec(args=['x', 'target', 'reduction', 'name'], varargs=None, keywords=None, defaults=('mean', None)), ('document', '776d536cac47c89073abc7ee524d5aec')) paddle.fluid.layers.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '34ea12ac9f10a65dccbc50100d12e607')) paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l2_reg'], varargs=None, keywords=None, defaults=(0.002,)), ('document', '46994d10276dd4cb803b4062b5d14329')) paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'b76ccca3735bea4a58a0dbf0d77c5393')) @@ -276,7 +278,7 @@ paddle.fluid.layers.array_write (ArgSpec(args=['x', 'i', 'array'], varargs=None, paddle.fluid.layers.create_array (ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None), ('document', '2d4f20087080ba5105b55205ad5c5b6a')) paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords=None, defaults=(None, None)), ('document', '067bbc799c66289ca8b8924c26b6673f')) paddle.fluid.layers.equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '80c29b1dc64718f0116de90d1ac88a77')) -paddle.fluid.layers.array_read (ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None), ('document', '0275133f1dde2aed528b4d3230edf823')) +paddle.fluid.layers.array_read (ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None), ('document', 'dd68bead34dfbaf6b0a163fc1cc3c385')) paddle.fluid.layers.array_length (ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None), ('document', 'ffb8b9578ec66db565b223d313aa82a2')) paddle.fluid.layers.IfElse.__init__ (ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.IfElse.false_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) @@ -357,6 +359,7 @@ paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], vara paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28')) paddle.fluid.layers.append_LARS (ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None), ('document', 'd24fa1e7d62ac8a534fc6a86002f84f8')) paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', '9588c64c26ffaef3c466e404a6af9d9b')) +paddle.fluid.layers.linear_lr_warmup (ArgSpec(args=['learning_rate', 'warmup_steps', 'start_lr', 'end_lr'], varargs=None, keywords=None, defaults=None), ('document', '2ef3f5ca5cd71ea4217c418e5a7a0565')) paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.StateCell.__init__ (ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.StateCell.compute_state (ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None), ('document', '92973b3f222081a1d17069c683cf4a99')) @@ -406,6 +409,7 @@ paddle.fluid.contrib.HDFSClient.rename (ArgSpec(args=['self', 'hdfs_src_path', ' paddle.fluid.contrib.HDFSClient.upload (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5)), ('document', '7d053b4bfd6dcfdd2c9dda0e0dbd9665')) paddle.fluid.contrib.multi_download (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)), ('document', '100927be598ed8f9eaa1f3ef1b23568a')) paddle.fluid.contrib.multi_upload (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)), ('document', '183f34c83d30dbe16e09e8716c41958a')) +paddle.fluid.contrib.extend_with_decoupled_weight_decay (ArgSpec(args=['base_optimizer'], varargs=None, keywords=None, defaults=None), ('document', 'a1095dfd4ec725747f662d69cd7659d4')) paddle.fluid.transpiler.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680')) paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8')) @@ -428,63 +432,75 @@ paddle.fluid.nets.scaled_dot_product_attention (ArgSpec(args=['queries', 'keys', paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)), ('document', '3802be78fbfb206dae64a2d9f8480970')) paddle.fluid.optimizer.SGDOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.SGDOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.SGDOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.SGDOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.SGDOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.SGDOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.MomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.MomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.MomentumOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.MomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.MomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.MomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.AdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.AdagradOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.AdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.AdagradOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.AdamOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdamOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.AdamOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.AdamOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.AdamOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdamOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.AdamaxOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.AdamaxOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.AdamaxOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.AdamaxOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdamaxOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.DecayedAdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.DecayedAdagradOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.FtrlOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.FtrlOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.FtrlOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.FtrlOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.FtrlOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.FtrlOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.RMSPropOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.RMSPropOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.RMSPropOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.AdadeltaOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.AdadeltaOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.AdadeltaOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdadeltaOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.ModelAverage.__init__ (ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.ModelAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '46234a5470590feb336346f70a3db715')) paddle.fluid.optimizer.ModelAverage.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.ModelAverage.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.ModelAverage.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.ModelAverage.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.ModelAverage.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.ModelAverage.restore (ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None), ('document', '18db9c70be9c4dd466f9844457b21bfe')) paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.LarsMomentumOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.LarsMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.LarsMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.LarsMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) paddle.fluid.optimizer.DGCMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'rampup_begin_step', 'rampup_step', 'sparsity', 'use_nesterov', 'local_grad_clip_norm', 'num_trainers', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1, [0.999], False, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.DGCMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.DGCMomentumOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.DGCMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) paddle.fluid.optimizer.DGCMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.DGCMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 236103c100100a0ffc7354b465ec02e85bab23e0..8749debe50c516212fb487e7bc6d404935a169f3 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -195,8 +195,7 @@ cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry proto_desc) -cc_test(inplace_op_inference_test SRCS inplace_op_inference_test.cc DEPS op_registry proto_desc op_info memory_optimize_helper) - +cc_test(inplace_op_inference_test SRCS inplace_op_inference_test.cc DEPS inplace_op_pass op_registry proto_desc op_info memory_optimize_helper pass_builder) cc_library(selected_rows SRCS selected_rows.cc DEPS tensor) cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index 28bb0ded374b754b2fa3a9397a39179742210061..2c1f3ae638cf95c3ab49219909fe3b1f22137099 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -25,8 +25,12 @@ if(WITH_DISTRIBUTE) endif() if(WITH_GPU) + set(dgc_deps "") + if(NOT WIN32) + set(dgc_deps dgc) + endif() nv_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory - dynload_cuda variable_visitor dgc) + dynload_cuda variable_visitor ${dgc_deps}) nv_library(fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory dynload_cuda variable_visitor) if(WITH_DISTRIBUTE) diff --git a/paddle/fluid/framework/details/all_reduce_deps_pass.cc b/paddle/fluid/framework/details/all_reduce_deps_pass.cc index d93c84606d9492920ebcf669650ab74fb5b09af5..878b950858a71ba0e10ab2643667922420d29099 100644 --- a/paddle/fluid/framework/details/all_reduce_deps_pass.cc +++ b/paddle/fluid/framework/details/all_reduce_deps_pass.cc @@ -68,7 +68,7 @@ void AllReduceDepsPass::ApplyImpl(ir::Graph* graph) const { for (auto& o_it : outputs) { for (auto& v : o_it.second) { // values vars[v] = order; - VLOG(1) << "in all_reduce_deps_pass:" << v; + VLOG(10) << "in all_reduce_deps_pass:" << v; } } order++; diff --git a/paddle/fluid/framework/details/inplace_op_pass.cc b/paddle/fluid/framework/details/inplace_op_pass.cc index afbda33b0662e7831b7ea0d44dc7ae4ff3694b1c..79150f719e379ca4e2b87d2e7db1b2daeee9aa67 100644 --- a/paddle/fluid/framework/details/inplace_op_pass.cc +++ b/paddle/fluid/framework/details/inplace_op_pass.cc @@ -156,7 +156,6 @@ void InplacePass::ApplyImpl(ir::Graph* graph) const { continue; TryInplaceOpInputOutput(op, graph); } - // graph->ResolveHazard(var_nodes_); } void InplacePass::InplaceModifyDesc(const std::string& var, @@ -168,7 +167,7 @@ void InplacePass::InplaceModifyDesc(const std::string& var, auto* op_desc = op->Op(); op_desc->RenameInput(var, cache_var); op_desc->RenameOutput(var, cache_var); - if (op_desc->Block()->HasVar(var)) op_desc->Block()->RemoveVar(var); + op_desc->Flush(); } } @@ -265,8 +264,6 @@ void InplacePass::WithdrawModify(const NodeSwapQueue& nodes, void InplacePass::TryInplaceOpInputOutput(ir::Node* op, ir::Graph* graph) const { VLOG(4) << "Try to inplace op " << op->Name(); - // PADDLE_ENFORCE(op->Op() != nullptr && op->Op()->Block() != nullptr, - // "op_desc is nullptr"); // some pre-requirments need to meet if the op want to inplaced. PADDLE_ENFORCE(op->Op() != nullptr, "op_desc is nullptr"); @@ -446,19 +443,20 @@ bool GraphView::CheckDeps(ir::Node* var, ir::Node* current_op) const { // check if op2 depends on op1's output bool GraphView::CheckOpDeps(ir::Node* op1, ir::Node* op2) const { - auto print_op = [&](ir::Node* op, const char* name) { - std::ostringstream os; - os << " " << name << " : " << op->Name() << " "; - os << "Input args : "; - for (auto& arg : op->inputs) os << arg->Name() << " "; - os << "Output args : "; - for (auto& arg : op->outputs) os << arg->Name() << " "; - os << "Level : " << op_level_.at(op); - VLOG(4) << os.str(); - }; - print_op(op1, "OP1"); - print_op(op2, "OP2"); - + if (VLOG_IS_ON(4)) { + auto print_op = [&](ir::Node* op, const char* name) { + std::ostringstream os; + os << " " << name << " : " << op->Name() << " "; + os << "Input args : "; + for (auto& arg : op->inputs) os << arg->Name() << " "; + os << "Output args : "; + for (auto& arg : op->outputs) os << arg->Name() << " "; + os << "Level : " << op_level_.at(op); + VLOG(4) << os.str(); + }; + print_op(op1, "OP1"); + print_op(op2, "OP2"); + } if (op1 == op2) return true; if (op_level_.at(op1) >= op_level_.at(op2)) return false; diff --git a/paddle/fluid/framework/details/memory_optimize_helper_test.cc b/paddle/fluid/framework/details/memory_optimize_helper_test.cc index 453943af0f123a08b870f11dacb78a5fbd954a56..3fb02f69b1bb65a74a2e5f69e9de7994b4d012db 100644 --- a/paddle/fluid/framework/details/memory_optimize_helper_test.cc +++ b/paddle/fluid/framework/details/memory_optimize_helper_test.cc @@ -142,16 +142,15 @@ TEST(OrderedSet, FindBestFitNode) { for (auto& node : nodes) { pool.Insert(node.get()); } - // FIXME(liuwei1031) this API has changed, - // disable these tests temporarily - // FindNextBestFitNode - // auto* n = nodes[0].get(); - // auto* cache = pool.FindBestFitNode(n); - // PADDLE_ENFORCE(cache->Name() == "a"); - // cache = pool.FindNextBestFitNode(n, cache); - // PADDLE_ENFORCE(cache->Name() == "c"); - // cache = pool.FindNextBestFitNode(n, cache); - // PADDLE_ENFORCE(cache->Name() == "b"); + + auto* n = nodes[0].get(); + auto* cache = pool.FindBestFitNode(n); + ASSERT_TRUE(cache->Name() == "a" || cache->Name() == "c"); + auto* cache_b = pool.FindNextBestFitNode(n, cache); + ASSERT_TRUE(cache_b->Name() != cache->Name()); + ASSERT_TRUE(cache_b->Name() == "a" || cache_b->Name() == "c"); + cache = pool.FindNextBestFitNode(n, cache_b); + ASSERT_TRUE(cache == nullptr); } } // namespace details diff --git a/paddle/fluid/framework/inplace_op_inference_test.cc b/paddle/fluid/framework/inplace_op_inference_test.cc index c93e562955fb36ddc4363fac862f3942758af35d..a9b3b889229ee46bf66063c8381bdd02c7229cbd 100644 --- a/paddle/fluid/framework/inplace_op_inference_test.cc +++ b/paddle/fluid/framework/inplace_op_inference_test.cc @@ -12,9 +12,14 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include #include +#include #include +#include #include "gtest/gtest.h" +#include "paddle/fluid/framework/details/inplace_op_pass.h" +#include "paddle/fluid/framework/ir/pass_builder.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" @@ -165,118 +170,147 @@ REGISTER_OPERATOR(multi_out_grad, f::NOP, f::MultiOutGradInplaceInToOut, namespace paddle { namespace framework { -// TEST(InferInplace, SingleOpInplaceInToOut) { -// ProgramDesc prog; -// auto* op = prog.MutableBlock(0)->AppendOp(); -// op->SetType("single_op"); -// op->SetInput("X", {"test2_a", "test2_b", "test2_c"}); -// op->SetOutput("Out", {"test2_out"}); -// -// prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 64, 128, 128}); -// prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("test2_out"); -// prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16, 128, 128}); -// -// auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; -// auto in_to_outs = infer_inplace(*op); -// EXPECT_EQ(in_to_outs.size(), 1ul); -// auto it = in_to_outs.begin(); -// EXPECT_EQ(it->first, "test2_a"); -// EXPECT_EQ(it->second, "test2_out"); -// } -// -// TEST(InferInplace, SingleGradOpInplaceInToOut) { -// ProgramDesc prog; -// auto* op = prog.MutableBlock(0)->AppendOp(); -// op->SetType("single_op_grad"); -// op->SetInput(GradVarName("Out"), {"test2_out"}); -// op->SetOutput(GradVarName("X"), {"test2_a", "test2_b", "test2_c"}); -// -// prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("test2_out"); -// prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16, 1024, 1024}); -// -// auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; -// auto in_to_outs = infer_inplace(*op); -// EXPECT_EQ(in_to_outs.size(), 1ul); -// auto it = in_to_outs.begin(); -// EXPECT_EQ(it->first, "test2_out"); -// EXPECT_EQ(it->second, "test2_a"); -// } -// -// TEST(InferInplace, MultiOutInplaceInToOut) { -// ProgramDesc prog; -// auto* op = prog.MutableBlock(0)->AppendOp(); -// op->SetType("multi_out_op"); -// op->SetInput("X", {"a0", "a1"}); -// op->SetInput("Y", {"b0"}); -// op->SetInput("Z", {"c0", "c1"}); -// op->SetOutput("Out", {"o0"}); -// op->SetOutput("YOut", {"y0"}); -// op->SetOutput("ZOut", {"z0"}); -// -// prog.MutableBlock(0)->Var("a0")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("b0")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("c0")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("c1")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("o0"); -// prog.MutableBlock(0)->Var("y0"); -// prog.MutableBlock(0)->Var("z0"); -// prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024}); -// -// auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; -// auto in_to_outs = infer_inplace(*op); -// EXPECT_EQ(in_to_outs.size(), 3ul); -// std::unordered_map expects = { -// {"a0", "o0"}, {"b0", "y0"}, {"c0", "z0"}, -// }; -// EXPECT_TRUE(expects == in_to_outs); -// } -// -// TEST(InferInplace, MultiGradInplaceInToOut) { -// ProgramDesc prog; -// auto* op = prog.MutableBlock(0)->AppendOp(); -// op->SetType("multi_out_grad"); -// op->SetInput(GradVarName("Out"), {"o0"}); -// op->SetInput(GradVarName("YOut"), {"y0"}); -// op->SetInput(GradVarName("ZOut"), {"z0"}); -// op->SetOutput(GradVarName("X"), {"a0", "a1"}); -// op->SetOutput(GradVarName("Y"), {"b0"}); -// op->SetOutput(GradVarName("Z"), {"c0", "c1"}); -// -// prog.MutableBlock(0)->Var("a0")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("b0")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("c0")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("c1")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("o0"); -// prog.MutableBlock(0)->Var("y0"); -// prog.MutableBlock(0)->Var("z0"); -// prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024}); -// -// auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; -// auto in_to_outs = infer_inplace(*op); -// -// EXPECT_EQ(in_to_outs.size(), 3ul); -// std::unordered_map expects = { -// {"o0", "a0"}, {"y0", "b0"}, {"z0", "c0"}, -// }; -// EXPECT_TRUE(expects == in_to_outs); -// } +void FakeSuccData(ProgramDesc* prog) { // NOLINT + prog->MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); + prog->MutableBlock(0)->Var("test2_a")->SetShape({32, 64, 128, 128}); + prog->MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); + prog->MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); + prog->MutableBlock(0)->Var("test2_out"); + prog->MutableBlock(0)->Var("test2_out")->SetShape({64, 32, 128, 128}); +} + +void FakeNoInplaceData(ProgramDesc* prog) { // NOLINT + prog->MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); + prog->MutableBlock(0)->Var("test2_a")->SetShape({32, 64, 128, 128}); + prog->MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); + prog->MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); + prog->MutableBlock(0)->Var("test2_out"); + prog->MutableBlock(0)->Var("test2_out")->SetShape({64, 31, 128, 128}); +} + +ir::Node* GetNodeFromGraph(ir::Graph* g, std::string name) { + ir::Node* op_node = nullptr; + for (auto& item : g->Nodes()) { + if (item->Name() == name) { + op_node = item; + break; + } + } + return op_node; +} + +std::unique_ptr test_SingleOpInplaceInToOut( + std::unique_ptr g) { + std::unique_ptr pass(new details::InplacePass()); + ir::Node* op_node = GetNodeFromGraph(g.get(), "single_op"); + EXPECT_NE(op_node, nullptr); + pass->Apply(g.get()); + return g; +} + +TEST(InferInplace, SingleOpInplaceInToOut) { + ProgramDesc prog; + auto* op = prog.MutableBlock(0)->AppendOp(); + op->SetType("single_op"); + op->SetInput("X", {"test2_a", "test2_b", "test2_c"}); + op->SetOutput("Out", {"test2_out"}); + + FakeSuccData(&prog); + std::unique_ptr g(new ir::Graph(prog)); + g = test_SingleOpInplaceInToOut(std::move(g)); + auto op_node = GetNodeFromGraph(g.get(), "single_op"); + + EXPECT_EQ(op_node->outputs[0]->Name(), "test2_a"); +} + +TEST(InferInplace, SingleOpInplaceInToOutNoInplace) { + ProgramDesc prog; + auto* op = prog.MutableBlock(0)->AppendOp(); + op->SetType("single_op"); + op->SetInput("X", {"test2_a", "test2_b", "test2_c"}); + op->SetOutput("Out", {"test2_out"}); + + FakeNoInplaceData(&prog); + std::unique_ptr g(new ir::Graph(prog)); + g = test_SingleOpInplaceInToOut(std::move(g)); + auto op_node = GetNodeFromGraph(g.get(), "single_op"); + + EXPECT_EQ(op_node->outputs[0]->Name(), "test2_out"); +} + +TEST(InferInplace, MultiOutInplaceInToOut) { + ProgramDesc prog; + auto* op = prog.MutableBlock(0)->AppendOp(); + op->SetType("multi_out_op"); + op->SetInput("X", {"a0", "a1"}); + op->SetInput("Y", {"b0"}); + op->SetInput("Z", {"c0", "c1"}); + op->SetOutput("Out", {"o0"}); + op->SetOutput("YOut", {"y0"}); + op->SetOutput("ZOut", {"z0"}); + + prog.MutableBlock(0)->Var("a0")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("b0")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("c0")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("c1")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("o0"); + prog.MutableBlock(0)->Var("y0"); + prog.MutableBlock(0)->Var("z0"); + prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024}); + + std::unique_ptr g(new ir::Graph(prog)); + std::unique_ptr pass(new details::InplacePass()); + pass->Apply(g.get()); + auto op_node = GetNodeFromGraph(g.get(), "multi_out_op"); + ASSERT_TRUE(op_node != nullptr); + EXPECT_EQ(op_node->outputs[0]->Name(), "a0"); + EXPECT_EQ(op_node->outputs[1]->Name(), "b0"); + EXPECT_EQ(op_node->outputs[2]->Name(), "c0"); +} + +TEST(InferInplace, MultiGradInplaceInToOut) { + ProgramDesc prog; + auto* op = prog.MutableBlock(0)->AppendOp(); + op->SetType("multi_out_grad"); + op->SetInput(GradVarName("Out"), {"o0"}); + op->SetInput(GradVarName("YOut"), {"y0"}); + op->SetInput(GradVarName("ZOut"), {"z0"}); + op->SetOutput(GradVarName("X"), {"a0", "a1"}); + op->SetOutput(GradVarName("Y"), {"b0"}); + op->SetOutput(GradVarName("Z"), {"c0", "c1"}); + + prog.MutableBlock(0)->Var("a0")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("b0")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("c0")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("c1")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("o0"); + prog.MutableBlock(0)->Var("y0"); + prog.MutableBlock(0)->Var("z0"); + prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("z0")->SetShape({32, 15, 1024, 1024}); + + std::unique_ptr g(new ir::Graph(prog)); + std::unique_ptr pass(new details::InplacePass()); + pass->Apply(g.get()); + auto op_node = GetNodeFromGraph(g.get(), "multi_out_grad"); + ASSERT_TRUE(op_node != nullptr); + EXPECT_EQ(op_node->outputs[0]->Name(), "o0"); + EXPECT_EQ(op_node->outputs[2]->Name(), "y0"); + EXPECT_EQ(op_node->outputs[3]->Name(), "c0"); + + std::unordered_map expects = { + {"o0", "a0"}, {"y0", "b0"}, {"z0", "c0"}, + }; +} } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 81b8ffa83f612f5b67cd91a7a2c1228519a1fbb7..ba1d7379c56d953a0f37d03deed6c47e46cbf129 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -68,21 +68,12 @@ pass_library(transpose_flatten_concat_fuse_pass inference) pass_library(identity_scale_op_clean_pass base) pass_library(sync_batch_norm_pass base) pass_library(runtime_context_cache_pass base) -pass_library(simplify_anakin_detection_pattern_pass inference) -pass_library(anakin_fillconstant_elementwisemul_fuse inference) +pass_library(quant_conv2d_dequant_fuse_pass inference) +pass_library(fillconstant_elementwisemul_fuse inference) -# There may be many transpose-flatten structures in a model, and the output of -# these structures will be used as inputs to the concat Op. This pattern will -# be detected by our pass. The index here represents the number of structures in the -# pattern. We use index 3 ~ 6, because these quantities of structures are -# common in the models. -foreach (index RANGE 2 6) - file(APPEND ${pass_file} "USE_PASS(transpose_flatten${index}_concat_fuse_pass);\n") -endforeach() - -foreach (index RANGE 2 6) - file(APPEND ${pass_file} "USE_PASS(simplify_anakin_detection_pattern_pass${index});\n") -endforeach() +if(ANAKIN_FOUND) +pass_library(simplify_anakin_priorbox_detection_out_pass inference) +endif() if(WITH_MKLDNN) pass_library(mkldnn_placement_pass base mkldnn) diff --git a/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc b/paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.cc similarity index 82% rename from paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc rename to paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.cc index 39077f6420613e115fff828eefc295769c187833..915a2f62bafa2baf98b7407cd87d3e69f20b44d2 100644 --- a/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc +++ b/paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.cc @@ -15,7 +15,7 @@ #include #include -#include "paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h" +#include "paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h" #include "paddle/fluid/framework/ir/graph_viz_pass.h" namespace paddle { @@ -29,8 +29,8 @@ namespace ir { GET_IR_NODE(elementwise_mul); \ GET_IR_NODE(elementwise_mul_out); -void AnakinFillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const { - const std::string pattern_name = "anakin_fillconstant_elementwisemul_fuse"; +void FillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const { + const std::string pattern_name = "fillconstant_elementwisemul_fuse"; FusePassBase::Init(pattern_name, graph); GraphPatternDetector gpd; @@ -39,8 +39,8 @@ void AnakinFillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const { ->assert_is_op_input("elementwise_mul", "X") ->AsInput(); - patterns::AnakinFillConstantElementWiseMulFuse pattern(gpd.mutable_pattern(), - pattern_name); + patterns::FillConstantElementWiseMulFuse pattern(gpd.mutable_pattern(), + pattern_name); pattern(x); auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, @@ -79,5 +79,5 @@ void AnakinFillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(anakin_fillconstant_elementwisemul_fuse, - paddle::framework::ir::AnakinFillconstantElementwisemulFuse); +REGISTER_PASS(fillconstant_elementwisemul_fuse, + paddle::framework::ir::FillconstantElementwisemulFuse); diff --git a/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h b/paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h similarity index 89% rename from paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h rename to paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h index 14c07c5884ebeda602953704de6db42f16441d6e..ab66fb4a46a8a5b60b3bf95e27ae24c7217a5a3a 100644 --- a/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h +++ b/paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h @@ -21,9 +21,9 @@ namespace paddle { namespace framework { namespace ir { -class AnakinFillconstantElementwisemulFuse : public FusePassBase { +class FillconstantElementwisemulFuse : public FusePassBase { public: - virtual ~AnakinFillconstantElementwisemulFuse() {} + virtual ~FillconstantElementwisemulFuse() {} protected: void ApplyImpl(ir::Graph* graph) const override; diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 555fdc7b7a03ebc99fcc77a26341d291dac2c308..8468f9ccc12a017ebe4fe73581e7bbce00dd626d 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -1471,7 +1471,8 @@ PDNode *patterns::TransposeFlattenConcat::operator()( } PDNode *patterns::AnakinDetectionPattern::operator()( - std::vector conv_in, int times) { + std::vector conv_in, int times, std::string priorbox_type, + bool is_reshape) { // The times represents the repeat times of the // {prior_box, prior_box_loc_out, flatten, prior_box_var_out, reshape} const int kNumFields = 7; @@ -1486,37 +1487,38 @@ PDNode *patterns::AnakinDetectionPattern::operator()( const int kMultiClassSecondInputNmsOffset = times + 1; std::vector nodes; + std::string op_after_priorbox = is_reshape ? "reshape2" : "flatten2"; for (int i = 0; i < times; i++) { nodes.push_back( pattern->NewNode(GetNodeName("prior_box" + std::to_string(i))) - ->assert_is_op("density_prior_box")); + ->assert_is_op(priorbox_type)); nodes.push_back(pattern->NewNode(GetNodeName("box_out" + std::to_string(i))) - ->assert_is_op_output("density_prior_box", "Boxes") - ->assert_is_op_input("reshape2", "X") + ->assert_is_op_output(priorbox_type, "Boxes") + ->assert_is_op_input(op_after_priorbox, "X") ->AsIntermediate()); nodes.push_back( pattern->NewNode(GetNodeName("reshape1" + std::to_string(i))) - ->assert_is_op("reshape2")); + ->assert_is_op(op_after_priorbox)); nodes.push_back( pattern->NewNode(GetNodeName("reshape1_out" + std::to_string(i))) - ->assert_is_op_output("reshape2") + ->assert_is_op_output(op_after_priorbox) ->assert_is_op_nth_input("concat", "X", i) ->AsIntermediate()); nodes.push_back( pattern->NewNode(GetNodeName("box_var_out" + std::to_string(i))) - ->assert_is_op_output("density_prior_box", "Variances") - ->assert_is_op_input("reshape2", "X") + ->assert_is_op_output(priorbox_type, "Variances") + ->assert_is_op_input(op_after_priorbox, "X") ->AsIntermediate()); nodes.push_back( pattern->NewNode(GetNodeName("reshape2" + std::to_string(i))) - ->assert_is_op("reshape2")); + ->assert_is_op(op_after_priorbox)); nodes.push_back( pattern->NewNode(GetNodeName("reshape2_out" + std::to_string(i))) - ->assert_is_op_output("reshape2") + ->assert_is_op_output(op_after_priorbox) ->assert_is_op_nth_input("concat", "X", i) ->AsIntermediate()); } @@ -1612,7 +1614,7 @@ PDNode *patterns::AnakinDetectionPattern::operator()( return multiclass_nms_out; } -PDNode *patterns::AnakinFillConstantElementWiseMulFuse::operator()( +PDNode *patterns::FillConstantElementWiseMulFuse::operator()( PDNode *elementwise_op_input) { auto fill_constant = pattern->NewNode(fill_constant_repr())->assert_is_op("fill_constant"); @@ -1635,6 +1637,76 @@ PDNode *patterns::AnakinFillConstantElementWiseMulFuse::operator()( return elementwise_mul_out; } +void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input, + const std::string &op_type, + const std::string &weight_name, + int times) { + const int kNumFields = 5; + const int kQuantizedWeightOffset = 0; + const int kQuantizedOpOffset = 1; + const int kQuantizedOpOutOffset = 2; + const int kDequantOpOffset = 3; + const int kDequantOpOutOffset = 4; + // the quant op always be one. + auto quant_op_in_scale = + pattern->NewNode(GetNodeName("quant_op_in_scale")) + ->assert_is_op_input("fake_quantize_range_abs_max", "InScale") + ->AsInput(); + auto quant_op = pattern->NewNode(GetNodeName("quant_op")) + ->assert_is_op("fake_quantize_range_abs_max"); + + auto quant_op_out_scale = + pattern->NewNode(GetNodeName("quant_op_out_scale")) + ->assert_is_op_output("fake_quantize_range_abs_max", "OutScale") + ->assert_is_op_input("fake_dequantize_max_abs", "Scale") + ->AsIntermediate(); + + auto quant_op_out = + pattern->NewNode(GetNodeName("quant_op_out")) + ->assert_is_op_output("fake_quantize_range_abs_max", "Out") + ->assert_is_op_input(op_type) + ->AsIntermediate(); + + // there are 'times' quantized and dequant op + std::vector nodes; + for (int i = 0; i < times; i++) { + nodes.push_back( + pattern->NewNode(GetNodeName("quantized_op_weight") + std::to_string(i)) + ->assert_is_op_input(op_type, weight_name) + ->AsInput()); + nodes.push_back( + pattern->NewNode(GetNodeName("quantized_op") + std::to_string(i)) + ->assert_is_op(op_type)); + + nodes.push_back( + pattern->NewNode(GetNodeName("quantized_op_out") + std::to_string(i)) + ->assert_is_op_output(op_type) + ->assert_is_op_input("fake_dequantize_max_abs", "X") + ->AsIntermediate()); + + nodes.push_back( + pattern->NewNode(GetNodeName("dequant_op") + std::to_string(i)) + ->assert_is_op("fake_dequantize_max_abs")); + nodes.push_back( + pattern->NewNode(GetNodeName("dequant_op_out") + std::to_string(i)) + ->assert_is_op_output("fake_dequantize_max_abs", "Out") + ->AsOutput()); + } + + quant_op->LinksFrom({quant_op_input, quant_op_in_scale}); + quant_op_out->LinksFrom({quant_op}); + for (int i = 0; i < times; i++) { + nodes[i * kNumFields + kQuantizedOpOffset]->LinksFrom( + {quant_op_out, nodes[i * kNumFields + kQuantizedWeightOffset]}); + nodes[i * kNumFields + kQuantizedOpOutOffset]->LinksFrom( + {nodes[i * kNumFields + kQuantizedOpOffset]}); + nodes[i * kNumFields + kDequantOpOffset]->LinksFrom( + {nodes[i * kNumFields + kQuantizedOpOutOffset], quant_op_out_scale}); + nodes[i * kNumFields + kDequantOpOutOffset]->LinksFrom( + {nodes[i * kNumFields + kDequantOpOffset]}); + } +} + } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 130ddeac4cd1a38516540d175e17d46f877bd909..a5ac3a0c3733cf610159c6367d04f3323b797c50 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -848,7 +848,8 @@ struct AnakinDetectionPattern : public PatternBase { AnakinDetectionPattern(PDPattern* pattern, const std::string& name_scope) : PatternBase(pattern, name_scope, "anakin_detect_pattern") {} - PDNode* operator()(std::vector conv_inputs, int times); + PDNode* operator()(std::vector conv_inputs, int times, + std::string priorbox_type, bool is_reshape); std::string GetNodeName(const std::string& op_type) { return PDNodeName(name_scope_, repr_, id_, op_type); @@ -859,9 +860,9 @@ struct AnakinDetectionPattern : public PatternBase { } }; -struct AnakinFillConstantElementWiseMulFuse : public PatternBase { - AnakinFillConstantElementWiseMulFuse(PDPattern* pattern, - const std::string& name_scope) +struct FillConstantElementWiseMulFuse : public PatternBase { + FillConstantElementWiseMulFuse(PDPattern* pattern, + const std::string& name_scope) : PatternBase(pattern, name_scope, "anakin_fillconstant_elementwisemul_fuse") {} @@ -874,6 +875,22 @@ struct AnakinFillConstantElementWiseMulFuse : public PatternBase { PATTERN_DECL_NODE(elementwise_mul_out); }; +struct QuantDequantOpFuse : public PatternBase { + QuantDequantOpFuse(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "quant_dequant_fuse") {} + + void operator()(PDNode* quant_op_input, const std::string& op_name, + const std::string& weight_name, int times = 1); + + std::string GetNodeName(const std::string& op_type) { + return PDNodeName(name_scope_, repr_, id_, op_type); + } + + PDNode* GetPDNode(const std::string& op_type) { + return pattern->RetrieveNode(GetNodeName(op_type)); + } +}; + } // namespace patterns // Link two ir::Nodes from each other. diff --git a/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..7cab9c353d35cb6d725d787986e992b6853d42ce --- /dev/null +++ b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc @@ -0,0 +1,173 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "paddle/fluid/framework/ir/graph_viz_pass.h" +#include "paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +void RunQuantDequant(ir::Graph* graph, Scope* scope, int times, + std::string op_type) { + const std::string pattern_name = "quant_dequant_fuse"; + // FusePassBase::Init(pattern_name, graph); + const int kNumFields = 5; + const int kQuantizedWeightOffset = 0; + const int kQuantizedOpOffset = 1; + const int kQuantizedOpOutOffset = 2; + const int kDequantOpOffset = 3; + const int kDequantOpOutOffset = 4; + + GraphPatternDetector gpd; + auto* x = gpd.mutable_pattern() + ->NewNode("x") + ->assert_is_op_input("fake_quantize_range_abs_max", "X") + ->AsInput(); + + std::string quantized_op_type = ""; + std::string weight_name = ""; + if (op_type == "conv2d") { + quantized_op_type = "conv2d"; + weight_name = "Filter"; + } else if (op_type == "conv2d_fusion") { + quantized_op_type = "conv2d_fusion"; + weight_name = "Filter"; + } else if (op_type == "mul") { + quantized_op_type = "mul"; + weight_name = "Y"; + } else if (op_type == "fc") { + quantized_op_type = "fc"; + weight_name = "W"; + } else { + PADDLE_ENFORCE( + "QuantDequantFuse: We only support conv2d, conv2d_fusion, fc, mul for " + "now."); + } + + patterns::QuantDequantOpFuse pattern(gpd.mutable_pattern(), pattern_name); + pattern(x, quantized_op_type, weight_name, times); + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + PADDLE_ENFORCE(subgraph.count(x)); + auto* input_node = subgraph.at(x); + Node* quant_op_in_scale = + subgraph.at(pattern.GetPDNode("quant_op_in_scale")); + Node* quant_op = subgraph.at(pattern.GetPDNode("quant_op")); + Node* quant_op_out_scale = + subgraph.at(pattern.GetPDNode("quant_op_out_scale")); + Node* quant_op_out = subgraph.at(pattern.GetPDNode("quant_op_out")); + + std::vector nodes; + for (int i = 0; i < times; i++) { + nodes.push_back(subgraph.at( + pattern.GetPDNode("quantized_op_weight" + std::to_string(i)))); + nodes.push_back( + subgraph.at(pattern.GetPDNode("quantized_op" + std::to_string(i)))); + nodes.push_back(subgraph.at( + pattern.GetPDNode("quantized_op_out" + std::to_string(i)))); + nodes.push_back( + subgraph.at(pattern.GetPDNode("dequant_op" + std::to_string(i)))); + nodes.push_back( + subgraph.at(pattern.GetPDNode("dequant_op_out" + std::to_string(i)))); + } + + int bit_length = boost::get(quant_op->Op()->GetAttr("bit_length")); + int range = ((1 << (bit_length - 1)) - 1); + // Prepare input scale + std::string input_scale_var_name = quant_op->Op()->Input("InScale").front(); + PADDLE_ENFORCE(scope); + const LoDTensor& input_scale_tensor = + scope->FindVar(input_scale_var_name)->Get(); + + PADDLE_ENFORCE(paddle::platform::is_cpu_place(input_scale_tensor.place())); + const float* input_scale_data = input_scale_tensor.data(); + float input_scale = input_scale_data[0]; + std::unordered_set delete_nodes; + + for (int i = 0; i < times; i++) { + // max_range = (range * range) / weight_scale + float max_range = boost::get( + nodes[i * kNumFields + kDequantOpOffset]->Op()->GetAttr("max_range")); + float weight_scale = (range * range) / max_range; + + auto base_op_desc = + *nodes[i * kNumFields + kQuantizedOpOffset]->Op()->Proto(); + std::string new_input = input_node->Name(); + std::string new_output = + nodes[i * kNumFields + kDequantOpOutOffset]->Name(); + + framework::OpDesc new_op_desc(base_op_desc, nullptr); + new_op_desc.SetType(quantized_op_type); + + if (quantized_op_type == "conv2d" || + quantized_op_type == "conv2d_fusion") { + new_op_desc.SetInput("Input", {new_input}); + new_op_desc.SetOutput("Output", {new_output}); + } else if (quantized_op_type == "fc") { + new_op_desc.SetInput("Input", {new_input}); + new_op_desc.SetOutput("Out", {new_output}); + } else if (quantized_op_type == "mul") { + new_op_desc.SetInput("X", {new_input}); + new_op_desc.SetOutput("Out", {new_output}); + } + + new_op_desc.SetAttr("enable_int8", true); + new_op_desc.SetAttr("input_scale", input_scale); + new_op_desc.SetAttr("weight_scale", weight_scale); + new_op_desc.Flush(); + auto* new_op = graph->CreateOpNode(&new_op_desc); + IR_NODE_LINK_TO(input_node, new_op); + IR_NODE_LINK_TO(nodes[i * kNumFields + kQuantizedWeightOffset], new_op); + IR_NODE_LINK_TO(new_op, nodes[i * kNumFields + kDequantOpOutOffset]); + delete_nodes.insert(nodes[i * kNumFields + kQuantizedOpOffset]); + delete_nodes.insert(nodes[i * kNumFields + kQuantizedOpOutOffset]); + delete_nodes.insert(nodes[i * kNumFields + kDequantOpOffset]); + } + + delete_nodes.insert(quant_op_in_scale); + delete_nodes.insert(quant_op); + delete_nodes.insert(quant_op_out); + delete_nodes.insert(quant_op_out_scale); + // Delete the unneeded nodes. + GraphSafeRemoveNodes(graph, delete_nodes); + }; + gpd(graph, handler); +} + +void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const { + const std::string pattern_name = "quant_dequant_fuse"; + FusePassBase::Init(pattern_name, graph); + + std::unordered_set quantized_op_types = {"conv2d", "mul"}; + auto* scope = param_scope(); + for (auto& op_type : quantized_op_types) { + for (int i = 1; i <= 6; i++) { + RunQuantDequant(graph, scope, i, op_type); + } + } +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(quant_conv2d_dequant_fuse_pass, + paddle::framework::ir::QuantDequantFusePass); diff --git a/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.h b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..a61b34563acc4cbcee778509a097587222579295 --- /dev/null +++ b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.h @@ -0,0 +1,35 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" + +namespace paddle { +namespace framework { +namespace ir { + +class QuantDequantFusePass : public FusePassBase { + public: + virtual ~QuantDequantFusePass() {} + + protected: + void ApplyImpl(ir::Graph* graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc b/paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.cc similarity index 84% rename from paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc rename to paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.cc index e1ddc444707148b1b781a922429de13a715f3b60..b3606e4d922cc8f59dca90904466a889f83f6094 100644 --- a/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc +++ b/paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.cc @@ -17,25 +17,24 @@ #include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/node.h" -#include "paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h" +#include "paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h" namespace paddle { namespace framework { namespace ir { -template -void SimplifyAnakinDetectionPatternPass::ApplyImpl( - ir::Graph *graph) const { +void RunSimplifyAnakinDetection(ir::Graph *graph, int times, bool is_density, + bool is_reshape) { const std::string pattern_name = "simplify_anakin_detection_pattern_pass" + std::to_string(times); - FusePassBase::Init(pattern_name, graph); + std::string priorbox_type = is_density ? "density_prior_box" : "prior_box"; GraphPatternDetector gpd; std::vector input_nodes; for (int i = 0; i < times; i++) { input_nodes.push_back(gpd.mutable_pattern() ->NewNode("x" + std::to_string(i)) - ->assert_is_op_input("density_prior_box", "Input") + ->assert_is_op_input(priorbox_type, "Input") ->AsInput()); } input_nodes.push_back(gpd.mutable_pattern() @@ -49,7 +48,7 @@ void SimplifyAnakinDetectionPatternPass::ApplyImpl( ->AsInput()); patterns::AnakinDetectionPattern pattern(gpd.mutable_pattern(), pattern_name); - pattern(input_nodes, times); + pattern(input_nodes, times, priorbox_type, is_reshape); auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, Graph *g) { @@ -119,8 +118,7 @@ void SimplifyAnakinDetectionPatternPass::ApplyImpl( boost::get(box_coder_op->Op()->GetAttr("code_type")); bool box_normalized = boost::get(box_coder_op->Op()->GetAttr("box_normalized")); - // auto variance = - // boost::get>(box_coder_op->Op()->GetAttr("variance")); + int background_label = boost::get(multiclass_nms->Op()->GetAttr("background_label")); float score_threshold = @@ -138,7 +136,6 @@ void SimplifyAnakinDetectionPatternPass::ApplyImpl( nodes[i * kNumFields + kPriorBoxLocOffset]->Name()); } - // int axis = boost::get(concat_op1->Op()->GetAttr("axis")); framework::OpDesc concat1_desc; concat1_desc.SetType("concat"); concat1_desc.SetInput("X", concat1_input_names); @@ -213,31 +210,24 @@ void SimplifyAnakinDetectionPatternPass::ApplyImpl( gpd(graph, handler); } -template class SimplifyAnakinDetectionPatternPass<1>; -template class SimplifyAnakinDetectionPatternPass<2>; -template class SimplifyAnakinDetectionPatternPass<3>; -template class SimplifyAnakinDetectionPatternPass<4>; -template class SimplifyAnakinDetectionPatternPass<5>; -template class SimplifyAnakinDetectionPatternPass<6>; +void SimplifyAnakinDetectionPatternPass::ApplyImpl(ir::Graph *graph) const { + const int pattern_nums = 6; + const std::string pattern_name = "simplify_anakin_detection_pattern_pass"; + FusePassBase::Init(pattern_name, graph); + std::vector options = {true, false}; + for (const auto &is_density : options) { + for (const auto &is_reshape : options) { + for (int i = 1; i <= pattern_nums; i++) { + RunSimplifyAnakinDetection(graph, i, is_density, is_reshape); + } + } + } +} } // namespace ir } // namespace framework } // namespace paddle -REGISTER_PASS(simplify_anakin_detection_pattern_pass, - paddle::framework::ir::SimplifyAnakinDetectionPatternPass<1>); - -REGISTER_PASS(simplify_anakin_detection_pattern_pass2, - paddle::framework::ir::SimplifyAnakinDetectionPatternPass<2>); - -REGISTER_PASS(simplify_anakin_detection_pattern_pass3, - paddle::framework::ir::SimplifyAnakinDetectionPatternPass<3>); - -REGISTER_PASS(simplify_anakin_detection_pattern_pass4, - paddle::framework::ir::SimplifyAnakinDetectionPatternPass<4>); - -REGISTER_PASS(simplify_anakin_detection_pattern_pass5, - paddle::framework::ir::SimplifyAnakinDetectionPatternPass<5>); - -REGISTER_PASS(simplify_anakin_detection_pattern_pass6, - paddle::framework::ir::SimplifyAnakinDetectionPatternPass<6>); +typedef paddle::framework::ir::SimplifyAnakinDetectionPatternPass + priorbox_pattern; +REGISTER_PASS(simplify_anakin_priorbox_detection_out_pass, priorbox_pattern); diff --git a/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h b/paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h similarity index 98% rename from paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h rename to paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h index e4a266cbe843ac56a8c0e4fb1e6f166afea6bfac..e882b9dc252e61a2e9e4e3666de49b7eee6d714a 100644 --- a/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h +++ b/paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h @@ -26,7 +26,6 @@ namespace ir { // these structures will be used as inputs to the concat Op. This pattern will // be detected by our pass. The times here represents the repeat times of this // structure. -template class SimplifyAnakinDetectionPatternPass : public FusePassBase { public: virtual ~SimplifyAnakinDetectionPatternPass() {} diff --git a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc index 61c12d4b6e76bf3021a92aa99953df626b0e45e7..a984a4942b374c3e2c5f148f8147c55d0f5deb24 100644 --- a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc +++ b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc @@ -25,11 +25,9 @@ namespace paddle { namespace framework { namespace ir { -template -void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const { +void RunTransposeFlattenConcatFuse(ir::Graph *graph, int times) { const std::string pattern_name = "transpose_flatten" + std::to_string(times) + "_concat_fuse"; - FusePassBase::Init(pattern_name, graph); GraphPatternDetector gpd; std::vector input_nodes; @@ -122,31 +120,18 @@ void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const { gpd(graph, handler); } -template class TransposeFlattenConcatFusePass<1>; -template class TransposeFlattenConcatFusePass<2>; -template class TransposeFlattenConcatFusePass<3>; -template class TransposeFlattenConcatFusePass<4>; -template class TransposeFlattenConcatFusePass<5>; -template class TransposeFlattenConcatFusePass<6>; +void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const { + const int pattern_nums = 6; + const std::string pattern_name = "transpose_flatten_concat_fuse"; + FusePassBase::Init(pattern_name, graph); + for (int i = 1; i <= pattern_nums; i++) { + RunTransposeFlattenConcatFuse(graph, i); + } +} } // namespace ir } // namespace framework } // namespace paddle REGISTER_PASS(transpose_flatten_concat_fuse_pass, - paddle::framework::ir::TransposeFlattenConcatFusePass<1>); - -REGISTER_PASS(transpose_flatten2_concat_fuse_pass, - paddle::framework::ir::TransposeFlattenConcatFusePass<2>); - -REGISTER_PASS(transpose_flatten3_concat_fuse_pass, - paddle::framework::ir::TransposeFlattenConcatFusePass<3>); - -REGISTER_PASS(transpose_flatten4_concat_fuse_pass, - paddle::framework::ir::TransposeFlattenConcatFusePass<4>); - -REGISTER_PASS(transpose_flatten5_concat_fuse_pass, - paddle::framework::ir::TransposeFlattenConcatFusePass<5>); - -REGISTER_PASS(transpose_flatten6_concat_fuse_pass, - paddle::framework::ir::TransposeFlattenConcatFusePass<6>); + paddle::framework::ir::TransposeFlattenConcatFusePass); diff --git a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h index 366d26d800c9899c455a3699f3f73f6e481aa0e0..939a8c31e5501e23968f9b44b4fe09e78280fd07 100644 --- a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h +++ b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h @@ -13,6 +13,8 @@ // limitations under the License. #pragma once +#include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -24,7 +26,6 @@ namespace ir { // these structures will be used as inputs to the concat Op. This pattern will // be detected by our pass. The times here represents the repeat times of this // structure. -template class TransposeFlattenConcatFusePass : public FusePassBase { public: virtual ~TransposeFlattenConcatFusePass() {} diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index b0ac73f9f52076a9303417bc1b19208ba6e6f2ec..168f287a455c644695b6eaff426ce31ded8d38a5 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -56,8 +56,8 @@ proto::VarType::Type GetDataTypeOfVar(const Variable* var) { } } -static DDim GetDims(const Scope& scope, const std::string& name, - bool get_actual_dim = false) { +static DDim GetDimsDebug(const Scope& scope, const std::string& name, + bool get_actual_dim = false) { Variable* var = scope.FindVar(name); if (var == nullptr) { return DDim({-1}); @@ -65,9 +65,9 @@ static DDim GetDims(const Scope& scope, const std::string& name, if (var->IsType()) { const LoDTensor& tensor = var->Get(); - // if (UNLIKELY(!tensor.IsInitialized())) { - // return DDim({-1}); - // } + if (UNLIKELY(!tensor.IsInitialized())) { + return DDim({-1}); + } return tensor.dims(); } else if (var->IsType()) { if (get_actual_dim) { @@ -123,7 +123,7 @@ static int GetRowSize(const Scope& scope, const std::string& name) { return -1; } -static LoD GetLoD(const Scope& scope, const std::string& name) { +static LoD GetLoDDebug(const Scope& scope, const std::string& name) { Variable* var = scope.FindVar(name); auto default_lod = LoD({{}}); @@ -133,9 +133,9 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { if (var->IsType()) { const LoDTensor& tensor = var->Get(); - // if (UNLIKELY(!tensor.IsInitialized())) { - // return default_lod; - // } + if (UNLIKELY(!tensor.IsInitialized())) { + return default_lod; + } return tensor.lod(); } else { return default_lod; @@ -274,8 +274,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { } std::string dtype = GetDtype(*scope, var_name); ss << ":" << dtype; - ss << "[" << GetDims(*scope, var_name, true) << "]"; - ss << "(" << GetLoD(*scope, var_name) << ")"; + ss << "[" << GetDimsDebug(*scope, var_name, true) << "]"; + ss << "(" << GetLoDDebug(*scope, var_name) << ")"; } } if (i != input.second.size() - 1) { @@ -305,8 +305,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { } std::string dtype = GetDtype(*scope, output.second[i]); ss << ":" << dtype; - ss << "[" << GetDims(*scope, var_name, true) << "]"; - ss << "(" << GetLoD(*scope, var_name) << ")"; + ss << "[" << GetDimsDebug(*scope, var_name, true) << "]"; + ss << "(" << GetLoDDebug(*scope, var_name) << ")"; } } if (i != output.second.size() - 1) { @@ -1017,7 +1017,7 @@ Scope* OperatorWithKernel::PrepareData( // of search key even though the set is empty. if (!no_buffer_ins.empty() && no_buffer_ins.count(var_name_item.first) > 0) { - VLOG(1) << "Skip scanning input " << var_name_item.first + VLOG(7) << "Skip scanning input " << var_name_item.first << " in Operator " << type_; continue; } diff --git a/paddle/fluid/inference/anakin/convert/density_prior_box.cc b/paddle/fluid/inference/anakin/convert/density_prior_box.cc index a55c153f99a815c0e0092b69b8e181630aed16bf..35e02919aa70c211da5d4a5785a9833747d99ce2 100644 --- a/paddle/fluid/inference/anakin/convert/density_prior_box.cc +++ b/paddle/fluid/inference/anakin/convert/density_prior_box.cc @@ -34,25 +34,41 @@ void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op, auto input_name = op_desc.Input("Input").front(); auto image_name = op_desc.Input("Image").front(); auto output_name = op_desc.Output("Boxes").front(); + auto op_type = op_desc.Type(); + auto op_name = op_type + ":" + op_desc.Output("Boxes").front(); - auto op_name = op_desc.Type() + ":" + op_desc.Output("Boxes").front(); + // only for density_prior_box + std::vector fixed_sizes = {}; + std::vector fixed_ratios = {}; + std::vector densities = {}; - auto fixed_sizes = - boost::get>(op_desc.GetAttr("fixed_sizes")); - auto fixed_ratios = - boost::get>(op_desc.GetAttr("fixed_ratios")); - auto densities = boost::get>(op_desc.GetAttr("densities")); + std::vector min_sizes = {}; + std::vector max_sizes = {}; + std::vector aspect_ratios = {}; + bool is_clip = false; + bool is_flip = false; + + if (op_type == "density_prior_box") { + fixed_sizes = + boost::get>(op_desc.GetAttr("fixed_sizes")); + fixed_ratios = + boost::get>(op_desc.GetAttr("fixed_ratios")); + densities = boost::get>(op_desc.GetAttr("densities")); + is_clip = boost::get(op_desc.GetAttr("clip")); + } else if (op_type == "prior_box") { + min_sizes = boost::get>(op_desc.GetAttr("min_sizes")); + max_sizes = boost::get>(op_desc.GetAttr("max_sizes")); + aspect_ratios = + boost::get>(op_desc.GetAttr("aspect_ratios")); + is_clip = boost::get(op_desc.GetAttr("clip")); + is_flip = boost::get(op_desc.GetAttr("flip")); + } std::vector dens; for (auto& ele : densities) { dens.push_back(static_cast(ele)); } - // lack flip - // auto clip = boost::get(op_desc.GetAttr("clip")); auto variances = boost::get>(op_desc.GetAttr("variances")); - for (auto& ele : variances) { - LOG(INFO) << ele; - } // lack img_h, img_w auto step_h = boost::get(op_desc.GetAttr("step_h")); @@ -66,14 +82,14 @@ void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op, std::vector temp_v = {}; engine_->AddOp(op_name, "PriorBox", {input_name, image_name}, {output_name}); - engine_->AddOpAttr>(op_name, "min_size", temp_v); - engine_->AddOpAttr>(op_name, "max_size", temp_v); - engine_->AddOpAttr>(op_name, "aspect_ratio", temp_v); + engine_->AddOpAttr>(op_name, "min_size", min_sizes); + engine_->AddOpAttr>(op_name, "max_size", max_sizes); + engine_->AddOpAttr>(op_name, "aspect_ratio", aspect_ratios); engine_->AddOpAttr>(op_name, "fixed_size", fixed_sizes); engine_->AddOpAttr>(op_name, "fixed_ratio", fixed_ratios); engine_->AddOpAttr>(op_name, "density", dens); - engine_->AddOpAttr(op_name, "is_flip", static_cast(false)); - engine_->AddOpAttr(op_name, "is_clip", static_cast(false)); + engine_->AddOpAttr(op_name, "is_flip", is_flip); + engine_->AddOpAttr(op_name, "is_clip", is_clip); engine_->AddOpAttr>(op_name, "variance", variances); engine_->AddOpAttr(op_name, "img_h", static_cast(0)); engine_->AddOpAttr(op_name, "img_w", static_cast(0)); @@ -88,3 +104,4 @@ void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op, } // namespace paddle REGISTER_ANAKIN_OP_CONVERTER(density_prior_box, DensityPriorBoxOpConverter); +REGISTER_ANAKIN_OP_CONVERTER(prior_box, DensityPriorBoxOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/op_converter.h b/paddle/fluid/inference/anakin/convert/op_converter.h index 4603681e1e8a3c2841a62cc88b49a84950910e73..45db4221747128cd7f6d26c8830fa75ebf81ac72 100644 --- a/paddle/fluid/inference/anakin/convert/op_converter.h +++ b/paddle/fluid/inference/anakin/convert/op_converter.h @@ -48,7 +48,7 @@ class AnakinOpConverter { framework::OpDesc op_desc(op, nullptr); std::string op_type = op_desc.Type(); AnakinOpConverter *it = nullptr; - + if (op_type == "depthwise_conv2d") op_type = "conv2d"; if (op_type == "reshape2") op_type = "reshape"; if (op_type == "transpose2") op_type = "transpose"; if (op_type == "flatten2") op_type = "flatten"; diff --git a/paddle/fluid/inference/anakin/op_teller.cc b/paddle/fluid/inference/anakin/op_teller.cc index 90cf021de2f9d365fd1fa21f7d189d3fcd9d3ab2..2042fb18ea41f8b41fc35543c7e1b642c4f2fa7c 100644 --- a/paddle/fluid/inference/anakin/op_teller.cc +++ b/paddle/fluid/inference/anakin/op_teller.cc @@ -42,6 +42,8 @@ struct SimpleOpTypeSetTeller : public Teller { teller_set.insert("dropout"); teller_set.insert("sigmoid"); teller_set.insert("sum"); + teller_set.insert("depthwise_conv2d"); + teller_set.insert("prior_box"); } bool operator()(const std::string& op_type, diff --git a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc index 9e05aa5c16186d67200c4630619cc53fa241aa1b..38612d5cc3d093885144f3b1cd6107232885b645 100644 --- a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc @@ -37,14 +37,14 @@ using framework::ir::Node; void analysis::AnakinSubgraphPass::ApplyImpl( framework::ir::Graph *graph) const { - framework::ir::FusePassBase::Init("anakin_subgraph_pass", graph.get()); + framework::ir::FusePassBase::Init("anakin_subgraph_pass", graph); auto teller = [](const framework::ir::Node *node) { if (!node->IsOp() || !node->Op()) return false; return anakin::OpTeller::Global().Tell(node->Op()->Type(), *node->Op()); }; - SubGraphFuser fuser(graph.get(), teller, 6 /* min_subgraph_size */); + SubGraphFuser fuser(graph, teller, 6 /* min_subgraph_size */); fuser(); std::vector graph_param_names = @@ -56,10 +56,10 @@ void analysis::AnakinSubgraphPass::ApplyImpl( for (auto *node : graph->Nodes()) { if (node->IsOp() && !Agent(node).subgraph()->empty()) { - CreateAnakinOp(node, graph.get(), graph_param_names, &repetitive_params); + CreateAnakinOp(node, graph, graph_param_names, &repetitive_params); std::unordered_set nodes2remove( Agent(node).subgraph()->begin(), Agent(node).subgraph()->end()); - framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove); + framework::ir::GraphSafeRemoveNodes(graph, nodes2remove); } } @@ -69,7 +69,7 @@ void analysis::AnakinSubgraphPass::ApplyImpl( nodes2remove.insert(node); } } - framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove); + framework::ir::GraphSafeRemoveNodes(graph, nodes2remove); graph->Set(framework::ir::kRepetitiveParamAttr, new std::vector(repetitive_params)); } diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index ef5872c52c6a1b3f3ade40ea43e78e2120fa6643..019098a5dd0d372a690955698a2ab6a4039a2416 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -192,6 +192,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( block_desc.Proto()->SerializeAsString()); SetAttr(op_desc->Proto(), "max_batch_size", Get("max_batch_size")); SetAttr(op_desc->Proto(), "workspace_size", Get("workspace_size")); + SetAttr(op_desc->Proto(), "gpu_id", Get("gpu_device_id")); SetAttr(op_desc->Proto(), "output_name_mapping", output_mapping); SetAttr(op_desc->Proto(), "parameters", params); diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc index d13ec7608c3e8075c1ef62fd4d47fbeee06e9005..1f27e80cf49f49863cf000d71369512242afb7b4 100644 --- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc @@ -52,6 +52,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { for (auto &var_name : all_vars) { if (std::count(repetitive_params.begin(), repetitive_params.end(), var_name)) { + scope->EraseVars({var_name}); continue; } auto *var = scope->FindLocalVar(var_name); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index f7260561547bb0bd7aea1590239e38090953f6fc..7d8e9fe8bfada743388afd3ae4eedb5d84961706 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -886,4 +886,5 @@ USE_ANAKIN_CONVERTER(detection_out); USE_ANAKIN_CONVERTER(density_prior_box); USE_ANAKIN_CONVERTER(dropout); USE_ANAKIN_CONVERTER(sum); +USE_ANAKIN_CONVERTER(prior_box); #endif diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 8ec32b3a0b7fe459518e269fc72b182bc168435f..1d1d39e44096b9f50e5bc9603fa12aba92b0e8e2 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -70,17 +70,15 @@ void GpuPassStrategy::EnableMKLDNN() { // The following passes works for Anakin sub-graph engine. const std::vector kAnakinSubgraphPasses({ - "infer_clean_graph_pass", // - "simplify_anakin_detection_pattern_pass5", // - "simplify_anakin_detection_pattern_pass4", // - "simplify_anakin_detection_pattern_pass3", // - "simplify_anakin_detection_pattern_pass2", // - "anakin_fillconstant_elementwisemul_fuse", // - "fc_fuse_pass", // - "conv_elementwise_add_fuse_pass", // - "conv_bn_fuse_pass", // - "conv_elementwise_add_fuse_pass", // - "fc_gru_fuse_pass", // + "infer_clean_graph_pass", // + "simplify_anakin_priorbox_detection_out_pass", // + "fillconstant_elementwisemul_fuse", // + "fc_fuse_pass", // + "conv_elementwise_add_fuse_pass", // + "conv_bn_fuse_pass", // + "conv_elementwise_add_fuse_pass", // + "fc_gru_fuse_pass", // + "quant_conv2d_dequant_fuse_pass", // "anakin_subgraph_pass", }); @@ -97,13 +95,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { "conv_elementwise_add2_act_fuse_pass", // "conv_elementwise_add_fuse_pass", // "runtime_context_cache_pass", // -#endif +#endif // + "transpose_flatten_concat_fuse_pass", }); - for (int i = 6; i >= 2; i--) { - passes_.push_back("transpose_flatten" + std::to_string(i) + - "_concat_fuse_pass"); - } use_gpu_ = true; } diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 2f17a44e0c08ef7d9204a115512a1cd76790efdf..6a31185b097bc0ddf93a6e32e61ac0a9f2d04cfd 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -23,6 +23,12 @@ function(inference_analysis_api_test target install_dir filename) ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt) endfunction() +function(inference_analysis_api_int8_test target model_dir data_dir filename) + inference_analysis_test(${target} SRCS ${filename} + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} benchmark + ARGS --infer_model=${model_dir}/model --infer_data=${data_dir}/data.bin --batch_size=100) +endfunction() + function(inference_analysis_api_test_with_fake_data target install_dir filename model_name) download_model(${install_dir} ${model_name}) inference_analysis_test(${target} SRCS ${filename} @@ -138,6 +144,28 @@ inference_analysis_api_test_with_fake_data(test_analyzer_resnet50 inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_conv "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz" SERIAL) +# int8 image classification tests +if(WITH_MKLDNN) + set(INT8_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8") + if (NOT EXISTS ${INT8_DATA_DIR}) + inference_download_and_uncompress(${INT8_DATA_DIR} "https://paddle-inference-dist.bj.bcebos.com/int8" "imagenet_val_100.tar.gz") + endif() + + #resnet50 int8 + set(INT8_RESNET50_MODEL_DIR "${INT8_DATA_DIR}/resnet50") + if (NOT EXISTS ${INT8_RESNET50_MODEL_DIR}) + inference_download_and_uncompress(${INT8_RESNET50_MODEL_DIR} "https://paddle-inference-dist.bj.bcebos.com/int8" "resnet50_int8_model.tar.gz" ) + endif() + inference_analysis_api_int8_test(test_analyzer_int8_resnet50 ${INT8_RESNET50_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL) + + #mobilenet int8 + set(INT8_MOBILENET_MODEL_DIR "${INT8_DATA_DIR}/mobilenet") + if (NOT EXISTS ${INT8_MOBILENET_MODEL_DIR}) + inference_download_and_uncompress(${INT8_MOBILENET_MODEL_DIR} "https://paddle-inference-dist.bj.bcebos.com/int8" "mobilenetv1_int8_model.tar.gz" ) + endif() + inference_analysis_api_int8_test(test_analyzer_int8_mobilenet ${INT8_MOBILENET_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL) +endif() + # bert, max_len=20, embedding_dim=128 set(BERT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/bert_emb128") download_model_and_data(${BERT_INSTALL_DIR} "bert_emb128_model.tar.gz" "bert_data_len20.txt.tar.gz") diff --git a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc index f646fd6d91c81b6738e4fc5278739307fa5f99b5..e73358d8827a40786beb05fad931267b0dd88f6b 100644 --- a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc @@ -53,19 +53,6 @@ void Split(const std::string &line, char sep, std::vector *v) { } } -template -constexpr paddle::PaddleDType GetPaddleDType(); - -template <> -constexpr paddle::PaddleDType GetPaddleDType() { - return paddle::PaddleDType::INT64; -} - -template <> -constexpr paddle::PaddleDType GetPaddleDType() { - return paddle::PaddleDType::FLOAT32; -} - // Parse tensor from string template bool ParseTensor(const std::string &field, paddle::PaddleTensor *tensor) { diff --git a/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..5a4f9a31a164a8fca3f80ce2fe2e6065fd04b340 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc @@ -0,0 +1,169 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "paddle/fluid/inference/api/paddle_analysis_config.h" +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +DEFINE_int32(iterations, 0, "Number of iterations"); + +namespace paddle { +namespace inference { +namespace analysis { + +void SetConfig(AnalysisConfig *cfg) { + cfg->SetModel(FLAGS_infer_model); + cfg->SetProgFile("__model__"); + cfg->DisableGpu(); + cfg->SwitchIrOptim(); + cfg->SwitchSpecifyInputNames(false); + cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads); + + cfg->EnableMKLDNN(); +} + +template +class TensorReader { + public: + TensorReader(std::ifstream &file, size_t beginning_offset, + std::vector shape, std::string name) + : file_(file), position(beginning_offset), shape_(shape), name_(name) { + numel = + std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies()); + } + + PaddleTensor NextBatch() { + PaddleTensor tensor; + tensor.name = name_; + tensor.shape = shape_; + tensor.dtype = GetPaddleDType(); + tensor.data.Resize(numel * sizeof(T)); + + file_.seekg(position); + file_.read(static_cast(tensor.data.data()), numel * sizeof(T)); + position = file_.tellg(); + + if (file_.eof()) LOG(ERROR) << name_ << ": reached end of stream"; + if (file_.fail()) + throw std::runtime_error(name_ + ": failed reading file."); + + return tensor; + } + + protected: + std::ifstream &file_; + size_t position; + std::vector shape_; + std::string name_; + size_t numel; +}; + +std::shared_ptr> GetWarmupData( + const std::vector> &test_data, int num_images) { + int test_data_batch_size = test_data[0][0].shape[0]; + CHECK_LE(static_cast(num_images), + test_data.size() * test_data_batch_size); + + PaddleTensor images; + images.name = "input"; + images.shape = {num_images, 3, 224, 224}; + images.dtype = PaddleDType::FLOAT32; + images.data.Resize(sizeof(float) * num_images * 3 * 224 * 224); + + PaddleTensor labels; + labels.name = "labels"; + labels.shape = {num_images, 1}; + labels.dtype = PaddleDType::INT64; + labels.data.Resize(sizeof(int64_t) * num_images); + + for (int i = 0; i < num_images; i++) { + auto batch = i / test_data_batch_size; + auto element_in_batch = i % test_data_batch_size; + std::copy_n(static_cast(test_data[batch][0].data.data()) + + element_in_batch * 3 * 224 * 224, + 3 * 224 * 224, + static_cast(images.data.data()) + i * 3 * 224 * 224); + + std::copy_n(static_cast(test_data[batch][1].data.data()) + + element_in_batch, + 1, static_cast(labels.data.data()) + i); + } + + auto warmup_data = std::make_shared>(2); + (*warmup_data)[0] = std::move(images); + (*warmup_data)[1] = std::move(labels); + return warmup_data; +} + +void SetInput(std::vector> *inputs, + int32_t batch_size = FLAGS_batch_size) { + std::ifstream file(FLAGS_infer_data, std::ios::binary); + if (!file) { + FAIL() << "Couldn't open file: " << FLAGS_infer_data; + } + + int64_t total_images{0}; + file.read(reinterpret_cast(&total_images), sizeof(total_images)); + LOG(INFO) << "Total images in file: " << total_images; + + std::vector image_batch_shape{batch_size, 3, 224, 224}; + std::vector label_batch_shape{batch_size, 1}; + auto labels_offset_in_file = + static_cast(file.tellg()) + + sizeof(float) * total_images * + std::accumulate(image_batch_shape.begin() + 1, + image_batch_shape.end(), 1, std::multiplies()); + + TensorReader image_reader(file, 0, image_batch_shape, "input"); + TensorReader label_reader(file, labels_offset_in_file, + label_batch_shape, "label"); + + auto iterations = total_images / batch_size; + if (FLAGS_iterations > 0 && FLAGS_iterations < iterations) + iterations = FLAGS_iterations; + for (auto i = 0; i < iterations; i++) { + auto images = image_reader.NextBatch(); + auto labels = label_reader.NextBatch(); + inputs->emplace_back( + std::vector{std::move(images), std::move(labels)}); + } +} + +TEST(Analyzer_int8_resnet50, quantization) { + AnalysisConfig cfg; + SetConfig(&cfg); + + AnalysisConfig q_cfg; + SetConfig(&q_cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all, 100); + + std::shared_ptr> warmup_data = + GetWarmupData(input_slots_all, 100); + + q_cfg.EnableMkldnnQuantizer(); + q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data); + q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(100); + + CompareQuantizedAndAnalysis( + reinterpret_cast(&cfg), + reinterpret_cast(&q_cfg), + input_slots_all); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py b/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..4d968c83d9c9bf9d947204d73f4460e62039cdda --- /dev/null +++ b/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py @@ -0,0 +1,162 @@ +# copyright (c) 2019 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. +import unittest +import os +import numpy as np +import time +import sys +import random +import functools +import contextlib +from PIL import Image, ImageEnhance +import math +from paddle.dataset.common import download + +random.seed(0) +np.random.seed(0) + +DATA_DIM = 224 + +SIZE_FLOAT32 = 4 +SIZE_INT64 = 8 + +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img_path, mode, color_jitter, rotate): + img = Image.open(img_path) + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def download_unzip(): + int8_download = 'int8/download' + + target_name = 'data' + + cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' + + int8_download) + + target_folder = os.path.join(cache_folder, target_name) + + data_urls = [] + data_md5s = [] + + data_urls.append( + 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partaa' + ) + data_md5s.append('60f6525b0e1d127f345641d75d41f0a8') + data_urls.append( + 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab' + ) + data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5') + + file_names = [] + + for i in range(0, len(data_urls)): + download(data_urls[i], cache_folder, data_md5s[i]) + file_names.append(data_urls[i].split('/')[-1]) + + zip_path = os.path.join(cache_folder, 'full_imagenet_val.tar.gz') + + if not os.path.exists(zip_path): + cat_command = 'cat' + for file_name in file_names: + cat_command += ' ' + os.path.join(cache_folder, file_name) + cat_command += ' > ' + zip_path + os.system(cat_command) + print('Data is downloaded at {0}\n').format(zip_path) + + if not os.path.exists(target_folder): + cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder, zip_path) + os.system(cmd) + print('Data is unzipped at {0}\n'.format(target_folder)) + + data_dir = os.path.join(target_folder, 'ILSVRC2012') + print('ILSVRC2012 full val set at {0}\n'.format(data_dir)) + return data_dir + + +def reader(): + data_dir = download_unzip() + file_list = os.path.join(data_dir, 'val_list.txt') + output_file = os.path.join(data_dir, 'int8_full_val.bin') + with open(file_list) as flist: + lines = [line.strip() for line in flist] + num_images = len(lines) + if not os.path.exists(output_file): + print( + 'Preprocessing to binary file......\n' + ) + with open(output_file, "w+b") as of: + #save num_images(int64_t) to file + of.seek(0) + num = np.array(int(num_images)).astype('int64') + of.write(num.tobytes()) + for idx, line in enumerate(lines): + img_path, label = line.split() + img_path = os.path.join(data_dir, img_path) + if not os.path.exists(img_path): + continue + + #save image(float32) to file + img = process_image( + img_path, 'val', color_jitter=False, rotate=False) + np_img = np.array(img) + of.seek(SIZE_INT64 + SIZE_FLOAT32 * DATA_DIM * DATA_DIM * 3 + * idx) + of.write(np_img.astype('float32').tobytes()) + + #save label(int64_t) to file + label_int = (int)(label) + np_label = np.array(label_int) + of.seek(SIZE_INT64 + SIZE_FLOAT32 * DATA_DIM * DATA_DIM * 3 + * num_images + idx * SIZE_INT64) + of.write(np_label.astype('int64').tobytes()) + + print('The preprocessed binary file path {}\n'.format(output_file)) + + +if __name__ == '__main__': + reader() diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index a4881afe58a03902556ddb8a057c5f0579e4d1d2..33f1d0254858814be20eee1a6c2faaf00c2e8178 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -50,6 +50,7 @@ DEFINE_bool(use_analysis, true, DEFINE_bool(record_benchmark, false, "Record benchmark after profiling the model"); DEFINE_double(accuracy, 1e-3, "Result Accuracy."); +DEFINE_double(quantized_accuracy, 1e-2, "Result Quantized Accuracy."); DEFINE_bool(zero_copy, false, "Use ZeroCopy to speedup Feed/Fetch."); DECLARE_bool(profile); @@ -58,6 +59,19 @@ DECLARE_int32(paddle_num_threads); namespace paddle { namespace inference { +template +constexpr paddle::PaddleDType GetPaddleDType(); + +template <> +constexpr paddle::PaddleDType GetPaddleDType() { + return paddle::PaddleDType::INT64; +} + +template <> +constexpr paddle::PaddleDType GetPaddleDType() { + return paddle::PaddleDType::FLOAT32; +} + void PrintConfig(const PaddlePredictor::Config *config, bool use_analysis) { const auto *analysis_config = reinterpret_cast(config); @@ -392,6 +406,32 @@ void TestPrediction(const PaddlePredictor::Config *config, } } +void CompareTopAccuracy(const std::vector &output_slots1, + const std::vector &output_slots2) { + // first output: avg_cost + if (output_slots1.size() == 0 || output_slots2.size() == 0) + throw std::invalid_argument( + "CompareTopAccuracy: output_slots vector is empty."); + PADDLE_ENFORCE(output_slots1.size() >= 2UL); + PADDLE_ENFORCE(output_slots2.size() >= 2UL); + + // second output: acc_top1 + if (output_slots1[1].lod.size() > 0 || output_slots2[1].lod.size() > 0) + throw std::invalid_argument( + "CompareTopAccuracy: top1 accuracy output has nonempty LoD."); + if (output_slots1[1].dtype != paddle::PaddleDType::FLOAT32 || + output_slots2[1].dtype != paddle::PaddleDType::FLOAT32) + throw std::invalid_argument( + "CompareTopAccuracy: top1 accuracy output is of a wrong type."); + float *top1_quantized = static_cast(output_slots1[1].data.data()); + float *top1_reference = static_cast(output_slots2[1].data.data()); + LOG(INFO) << "top1 INT8 accuracy: " << *top1_quantized; + LOG(INFO) << "top1 FP32 accuracy: " << *top1_reference; + LOG(INFO) << "Accepted accuracy drop threshold: " << FLAGS_quantized_accuracy; + CHECK_LE(std::abs(*top1_quantized - *top1_reference), + FLAGS_quantized_accuracy); +} + void CompareDeterministic( const PaddlePredictor::Config *config, const std::vector> &inputs) { @@ -421,6 +461,17 @@ void CompareNativeAndAnalysis( CompareResult(analysis_outputs, native_outputs); } +void CompareQuantizedAndAnalysis( + const PaddlePredictor::Config *config, + const PaddlePredictor::Config *qconfig, + const std::vector> &inputs) { + PrintConfig(config, true); + std::vector analysis_outputs, quantized_outputs; + TestOneThreadPrediction(config, inputs, &analysis_outputs, true); + TestOneThreadPrediction(qconfig, inputs, &quantized_outputs, true); + CompareTopAccuracy(quantized_outputs, analysis_outputs); +} + void CompareNativeAndAnalysis( PaddlePredictor *native_pred, PaddlePredictor *analysis_pred, const std::vector> &inputs) { diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index 36d297ec5523b9e8a136c536165bdb4d3a380c25..f8baf082597d6152257e2ea74f14b6903a7be332 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -23,6 +23,16 @@ limitations under the License. */ #include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/float16.h" +// CUDNN_BATCHNORM_SPATIAL_PERSISTENT in batchnorm. This mode can be faster in +// some tasks because an optimized path may be selected for CUDNN_DATA_FLOAT +// and CUDNN_DATA_HALF data types, compute capability 6.0 or higher. The +// reason we set it to false by default is that this mode may use scaled +// atomic integer reduction that may cause a numerical overflow for certain +// input data range. +DEFINE_bool(cudnn_batchnorm_spatial_persistent, false, + "Whether enable CUDNN_BATCHNORM_SPATIAL_PERSISTENT mode for cudnn " + "batch_norm, defalut is False."); + namespace paddle { namespace operators { @@ -76,7 +86,11 @@ class BatchNormKernel } epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); #if CUDNN_VERSION_MIN(7, 0, 0) - mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; + if (FLAGS_cudnn_batchnorm_spatial_persistent) { + mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; + } else { + mode_ = CUDNN_BATCHNORM_SPATIAL; + } #else mode_ = CUDNN_BATCHNORM_SPATIAL; #endif @@ -302,7 +316,11 @@ class BatchNormGradKernel } epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); #if CUDNN_VERSION_MIN(7, 0, 0) - mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; + if (FLAGS_cudnn_batchnorm_spatial_persistent) { + mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; + } else { + mode_ = CUDNN_BATCHNORM_SPATIAL; + } #else mode_ = CUDNN_BATCHNORM_SPATIAL; #endif diff --git a/paddle/fluid/operators/jit/benchmark.cc b/paddle/fluid/operators/jit/benchmark.cc index fbb04a166ef52efd9bd05f27ca656d928d97fb96..9ff1fe478d7f292e9b956c49920b016318db1c38 100644 --- a/paddle/fluid/operators/jit/benchmark.cc +++ b/paddle/fluid/operators/jit/benchmark.cc @@ -386,7 +386,7 @@ void BenchKernelSoftmax() { RandomVec(bs * n, x.mutable_data(PlaceType()), -2.f, 2.f); const T* x_data = x.data(); T* y_data = y.mutable_data(PlaceType()); - BenchAllImpls(n, x_data, y_data, n, bs); + BenchAllImpls(n, x_data, y_data, n, bs, 1); } } } diff --git a/paddle/fluid/operators/jit/helper.cc b/paddle/fluid/operators/jit/helper.cc index eb1c410b6f9a31c3f97a274c5e5ff55bf1c32ea0..f868c847bd80e874da2d2babde58129122e0bc70 100644 --- a/paddle/fluid/operators/jit/helper.cc +++ b/paddle/fluid/operators/jit/helper.cc @@ -34,6 +34,7 @@ const char* to_string(KernelType kt) { ONE_CASE(kVAddRelu); ONE_CASE(kVSub); ONE_CASE(kVScal); + ONE_CASE(kStrideScal); ONE_CASE(kVAddBias); ONE_CASE(kVRelu); ONE_CASE(kVBroadcast); @@ -55,6 +56,7 @@ const char* to_string(KernelType kt) { ONE_CASE(kMatMul); ONE_CASE(kHMax); ONE_CASE(kHSum); + ONE_CASE(kStrideASum); ONE_CASE(kSoftmax); ONE_CASE(kEmbSeqPool); ONE_CASE(kSgd); diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h index bd34d7dfc72a139e70983c56c3220bd01d572bcd..6e0393b820f3780940d37659a067a630a6a0ae2b 100644 --- a/paddle/fluid/operators/jit/kernel_base.h +++ b/paddle/fluid/operators/jit/kernel_base.h @@ -38,6 +38,8 @@ typedef enum { kNCHW16CMulNC, kSeqPool, kSoftmax, + kStrideASum, + kStrideScal, kVAdd, kVAddBias, kVAddRelu, @@ -74,6 +76,14 @@ struct XYZNTuple { template struct AXYNTuple : public XYZNTuple {}; +// a, x, y, n, stride +template +struct AXYNSTuple { + typedef T data_type; + typedef int attr_type; + typedef void (*func_type)(const T*, const T*, T*, int, int); +}; + // x, y, n template struct XYNTuple { @@ -86,6 +96,14 @@ struct XYNTuple { template struct XRNTuple : public XYNTuple {}; +// x, returned value, n, stride +template +struct XRNSTuple { + typedef T data_type; + typedef int attr_type; + typedef void (*func_type)(const T*, T*, int, int); +}; + #define DECLARE_KERNELTUPLE(kernel_tuple, type) \ template \ struct type##Tuple : public kernel_tuple { \ @@ -101,6 +119,8 @@ DECLARE_KERNELTUPLE(XYZNTuple, VSub); DECLARE_KERNELTUPLE(AXYNTuple, VScal); DECLARE_KERNELTUPLE(AXYNTuple, VAddBias); +DECLARE_KERNELTUPLE(AXYNSTuple, StrideScal); + DECLARE_KERNELTUPLE(XYNTuple, VRelu); DECLARE_KERNELTUPLE(XYNTuple, VIdentity); DECLARE_KERNELTUPLE(XYNTuple, VSquare); @@ -112,6 +132,8 @@ DECLARE_KERNELTUPLE(XYNTuple, VCopy); DECLARE_KERNELTUPLE(XRNTuple, HMax); DECLARE_KERNELTUPLE(XRNTuple, HSum); +DECLARE_KERNELTUPLE(XRNSTuple, StrideASum); + typedef struct { void* gates; // gates: x_ch, x_ih, x_fh, x_oh const void* ct_1; @@ -285,7 +307,7 @@ struct SoftmaxTuple { static constexpr KernelType kernel_type = kSoftmax; typedef T data_type; typedef int attr_type; - typedef void (*func_type)(const T*, T*, int, int); + typedef void (*func_type)(const T*, T*, int, int, int); }; // nChw16c = nChw16c .* NC diff --git a/paddle/fluid/operators/jit/more/mix/mix.cc b/paddle/fluid/operators/jit/more/mix/mix.cc index 6e709a16d232e2fa1a77e74e228b763fed4dd75b..f5b7bfff89825bfcd6cbe4b1008628d3e1093f4c 100644 --- a/paddle/fluid/operators/jit/more/mix/mix.cc +++ b/paddle/fluid/operators/jit/more/mix/mix.cc @@ -50,10 +50,15 @@ void VTanh(const T* x, T* y, int n) { compute_addbias(&b, y, y, n); } -void Softmax(const T* x, T* y, int n, int bs) { +// remain is the product of dimension shapes after the axis dimension +void Softmax(const T* x, T* y, int n, int bs, int remain) { auto compute_hmax = KernelFuncs, CPUPlace>::Cache().At(n); auto compute_hsum = KernelFuncs, CPUPlace>::Cache().At(n); auto compute_vscal = KernelFuncs, CPUPlace>::Cache().At(n); + auto compute_strideasum = + KernelFuncs, CPUPlace>::Cache().At(n); + auto compute_stridescal = + KernelFuncs, CPUPlace>::Cache().At(n); auto compute_vaddbias = KernelFuncs, CPUPlace>::Cache().At(n); auto compute_vexp = KernelFuncs, CPUPlace>::Cache().At(n); @@ -64,9 +69,17 @@ void Softmax(const T* x, T* y, int n, int bs) { scalar = static_cast(0) - scalar; compute_vaddbias(&scalar, x, y, n); // x - max compute_vexp(y, y, n); - compute_hsum(y, &scalar, n); - scalar = static_cast(1) / scalar; - compute_vscal(&scalar, y, y, n); + if (remain == 1) { + compute_hsum(y, &scalar, n); + scalar = static_cast(1) / scalar; + compute_vscal(&scalar, y, y, n); + } else { + for (int j = 0; j < remain; ++j) { + compute_strideasum(&y[j], &scalar, n, remain); + scalar = static_cast(1) / scalar; + compute_stridescal(&scalar, &y[j], &y[j], n, remain); + } + } x += n; y += n; } diff --git a/paddle/fluid/operators/jit/more/mix/mix.h b/paddle/fluid/operators/jit/more/mix/mix.h index 994d485909c874a8a15418ad946c79a10265c748..035425317edca95bc574807fa029ff373a7e10b8 100644 --- a/paddle/fluid/operators/jit/more/mix/mix.h +++ b/paddle/fluid/operators/jit/more/mix/mix.h @@ -26,7 +26,7 @@ using T = float; void VSigmoid(const T* x, T* y, int n); void VTanh(const T* x, T* y, int n); -void Softmax(const T* x, T* y, int n, int bs); +void Softmax(const T* x, T* y, int n, int bs, int remain); void LSTMCtHt(lstm_t* step, const lstm_attr_t* attr); void LSTMC1H1(lstm_t* step, const lstm_attr_t* attr); diff --git a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt index f69417c370b653d93cce04a2248ad809168670da..56f1a62ad4e06807dace2a81156d92f6b02a14df 100644 --- a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt @@ -7,6 +7,7 @@ USE_JITKERNEL_MORE(kMatMul, mkl) USE_JITKERNEL_MORE(kVMul, mkl) USE_JITKERNEL_MORE(kVAdd, mkl) USE_JITKERNEL_MORE(kVScal, mkl) +USE_JITKERNEL_MORE(kStrideScal, mkl) USE_JITKERNEL_MORE(kVExp, mkl) USE_JITKERNEL_MORE(kVSquare, mkl) USE_JITKERNEL_MORE(kVCopy, mkl) diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.cc b/paddle/fluid/operators/jit/more/mkl/mkl.cc index 4f600b38144f53798e3d4c66264fc5bfa671a4f7..75ebddb125989b121b62d42b50e896eccd392a71 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.cc +++ b/paddle/fluid/operators/jit/more/mkl/mkl.cc @@ -78,6 +78,26 @@ void VScal(const double* a, const double* x, double* y, int n) { } } +template <> +void StrideScal(const float* a, const float* x, float* y, int n, + int stride) { + if (x == y) { + platform::dynload::cblas_sscal(n / stride, *a, y, stride); + } else { + refer::StrideScal(a, x, y, n, stride); + } +} + +template <> +void StrideScal(const double* a, const double* x, double* y, int n, + int stride) { + if (x == y) { + platform::dynload::cblas_dscal(n / stride, *a, y, stride); + } else { + refer::StrideScal(a, x, y, n, stride); + } +} + template <> void VExp(const float* x, float* y, int n) { platform::dynload::vsExp(n, x, y); @@ -128,6 +148,16 @@ void ASum(const double* x, double* res, int n) { res[0] = platform::dynload::cblas_dasum(n, x, 1); } +template <> +void StrideASum(const float* x, float* res, int n, int stride) { + res[0] = platform::dynload::cblas_sasum(n / stride, x, stride); +} + +template <> +void StrideASum(const double* x, double* res, int n, int stride) { + res[0] = platform::dynload::cblas_dasum(n / stride, x, stride); +} + // TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512 template <> bool VMulKernel::CanBeUsed(const int& d) const { @@ -144,6 +174,11 @@ bool VScalKernel::CanBeUsed(const int& d) const { return platform::MayIUse(platform::avx512f) && d > 512; } +template <> +bool StrideScalKernel::CanBeUsed(const int& d) const { + return true; +} + template <> bool VExpKernel::CanBeUsed(const int& d) const { return d > 7; @@ -235,6 +270,7 @@ bool SoftmaxKernel::CanBeUsed(const int& d) const { AWALYS_USE_ME_WITH_DOUBLE(VMul); AWALYS_USE_ME_WITH_DOUBLE(VAdd); AWALYS_USE_ME_WITH_DOUBLE(VScal); +AWALYS_USE_ME_WITH_DOUBLE(StrideScal); AWALYS_USE_ME_WITH_DOUBLE(VExp); AWALYS_USE_ME_WITH_DOUBLE(VSigmoid); AWALYS_USE_ME_WITH_DOUBLE(VTanh); @@ -259,6 +295,7 @@ REGISTER_MKL_KERNEL(MatMul); REGISTER_MKL_KERNEL(VMul); REGISTER_MKL_KERNEL(VAdd); REGISTER_MKL_KERNEL(VScal); +REGISTER_MKL_KERNEL(StrideScal); REGISTER_MKL_KERNEL(VExp); REGISTER_MKL_KERNEL(VSquare); REGISTER_MKL_KERNEL(VCopy); diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.h b/paddle/fluid/operators/jit/more/mkl/mkl.h index f51dca654cd3d93dcd396af7895aebf5ee915c22..b38cc107b8e3038e04db4ed809d647e9a20d45fc 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.h +++ b/paddle/fluid/operators/jit/more/mkl/mkl.h @@ -129,7 +129,14 @@ template void ASum(const T* x, T* res, int n); template -void Softmax(const T* x, T* y, int n, int bs) { +void StrideASum(const T* x, T* res, int n, int stride); + +template +void StrideScal(const T* a, const T* x, T* y, int n, int stride); + +// remain is the product of dimension shapes after the axis dimension +template +void Softmax(const T* x, T* y, int n, int bs, int remain = 1) { std::vector entities(bs); for (int i = 0; i < bs; ++i) { entities[i] = x[i * n]; @@ -143,9 +150,17 @@ void Softmax(const T* x, T* y, int n, int bs) { VExp(y, y, n * bs); for (int i = 0; i < bs; ++i) { T sum; - ASum(&y[i * n], &sum, n); - sum = static_cast(1) / sum; - VScal(&sum, &y[i * n], &y[i * n], n); + if (remain == 1) { + ASum(&y[i * n], &sum, n); + sum = static_cast(1) / sum; + VScal(&sum, &y[i * n], &y[i * n], n); + } else { + for (int j = 0; j < remain; ++j) { + StrideASum(&y[i * n + j], &sum, n, remain); + sum = static_cast(1) / sum; + StrideScal(&sum, &y[i * n + j], &y[i * n + j], n, remain); + } + } } } @@ -193,6 +208,7 @@ DECLARE_MKL_KERNEL(VAdd); // AXYN DECLARE_MKL_KERNEL(VScal); +DECLARE_MKL_KERNEL(StrideScal); // XYN DECLARE_MKL_KERNEL(VExp); diff --git a/paddle/fluid/operators/jit/refer/CMakeLists.txt b/paddle/fluid/operators/jit/refer/CMakeLists.txt index ffab9c1457b932b3211e6aa75954bb1435f8e34c..7133f596620410d37ffe52a2ee92b7a9974bf1cc 100644 --- a/paddle/fluid/operators/jit/refer/CMakeLists.txt +++ b/paddle/fluid/operators/jit/refer/CMakeLists.txt @@ -12,6 +12,7 @@ USE_JITKERNEL_REFER(kVAdd) USE_JITKERNEL_REFER(kVAddRelu) USE_JITKERNEL_REFER(kVSub) USE_JITKERNEL_REFER(kVScal) +USE_JITKERNEL_REFER(kStrideScal) USE_JITKERNEL_REFER(kVAddBias) USE_JITKERNEL_REFER(kVCopy) USE_JITKERNEL_REFER(kVRelu) @@ -32,6 +33,7 @@ USE_JITKERNEL_REFER(kMatMul) USE_JITKERNEL_REFER(kVSquare) USE_JITKERNEL_REFER(kHSum) USE_JITKERNEL_REFER(kHMax) +USE_JITKERNEL_REFER(kStrideASum) USE_JITKERNEL_REFER(kSoftmax) USE_JITKERNEL_REFER(kEmbSeqPool) USE_JITKERNEL_REFER(kSgd) diff --git a/paddle/fluid/operators/jit/refer/refer.cc b/paddle/fluid/operators/jit/refer/refer.cc index 0d1c4770903fc59160e308b958270e5826928d61..460cb6c58076d7f6c49b60fed45584bd9b506c63 100644 --- a/paddle/fluid/operators/jit/refer/refer.cc +++ b/paddle/fluid/operators/jit/refer/refer.cc @@ -27,6 +27,7 @@ REGISTER_REFER_KERNEL(VAddRelu); REGISTER_REFER_KERNEL(VSub); REGISTER_REFER_KERNEL(VScal); +REGISTER_REFER_KERNEL(StrideScal); REGISTER_REFER_KERNEL(VAddBias); REGISTER_REFER_KERNEL(VRelu); @@ -51,6 +52,7 @@ REGISTER_REFER_KERNEL(SeqPool); REGISTER_REFER_KERNEL(MatMul); REGISTER_REFER_KERNEL(HMax); REGISTER_REFER_KERNEL(HSum); +REGISTER_REFER_KERNEL(StrideASum); REGISTER_REFER_KERNEL(Softmax); REGISTER_REFER_KERNEL(EmbSeqPool); REGISTER_REFER_KERNEL(Sgd); diff --git a/paddle/fluid/operators/jit/refer/refer.h b/paddle/fluid/operators/jit/refer/refer.h index cac705a484127b4813ef2d0996bf5aaee2b9f1b3..136b99e0aeffec8e93e11c2e5e4f7bd35dd1c8d4 100644 --- a/paddle/fluid/operators/jit/refer/refer.h +++ b/paddle/fluid/operators/jit/refer/refer.h @@ -411,19 +411,47 @@ void HSum(const T* x, T* res, int n) { } } +template +void StrideASum(const T* x, T* res, int n, int stride) { + res[0] = x[0]; + for (int i = stride; i < n; i += stride) { + res[0] += std::abs(x[i]); + } +} + +template +void StrideScal(const T* a, const T* x, T* y, int n, int stride) { + for (int i = 0; i < n; ++i) { + if (i % stride == 0) { + y[i] = x[i] * a[0]; + } else { + y[i] = x[i]; + } + } +} + // y = e^(x - max(x)) // y = y / sum(y) +// remain is the product of dimension shapes after the axis dimension template -void Softmax(const T* x, T* y, int n, int bs = 1) { +void Softmax(const T* x, T* y, int n, int bs = 1, int remain = 1) { for (int i = 0; i < bs; ++i) { T scalar; HMax(x, &scalar, n); scalar = static_cast(0) - scalar; VAddBias(&scalar, x, y, n); // x - max VExp(y, y, n); - HSum(y, &scalar, n); - scalar = static_cast(1) / scalar; - VScal(&scalar, y, y, n); + if (remain == 1) { + HSum(y, &scalar, n); + scalar = static_cast(1) / scalar; + VScal(&scalar, y, y, n); + } else { + for (int j = 0; j < remain; j++) { + StrideASum(&y[j], &scalar, n, remain); + scalar = static_cast(1) / scalar; + StrideScal(&scalar, &y[j], &y[j], n, remain); + } + } x += n; y += n; } @@ -507,6 +535,9 @@ DECLARE_REFER_KERNEL(VSub); DECLARE_REFER_KERNEL(VScal); DECLARE_REFER_KERNEL(VAddBias); +// const T* a, const T* x, T* y, int n, int stride +DECLARE_REFER_KERNEL(StrideScal); + // const T* x, T* y, int n DECLARE_REFER_KERNEL(VRelu); DECLARE_REFER_KERNEL(VIdentity); @@ -528,6 +559,8 @@ DECLARE_REFER_KERNEL(GRUHtPart2); DECLARE_REFER_KERNEL(HMax); DECLARE_REFER_KERNEL(HSum); +DECLARE_REFER_KERNEL(StrideASum); + // others DECLARE_REFER_KERNEL(CRFDecoding); DECLARE_REFER_KERNEL(LayerNorm); diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index 6c099a7a062472e2701401ddc58bb9051074f810..d30fa014ed5fbac9ed71f3185ce0443d33f4a281 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -723,39 +723,122 @@ void TestKernelSoftmax() { VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type); for (int bs : {1, 2, 10}) { for (int n : TestSizes()) { + for (int m : {1, 2, 3}) { // remain + if (m > n || n % m != 0) { + continue; + } + auto ref = jit::GetReferFunc(); + EXPECT_TRUE(ref != nullptr); + std::vector x(bs * n), y(bs * n); + RandomVec(bs * n, x.data()); + const T* x_data = x.data(); + T* y_data = y.data(); + + std::vector xinp(x.size()); // inplace test + std::copy(x.begin(), x.end(), xinp.begin()); + ref(x_data, y_data, n, bs, m); + T* xinp_data = xinp.data(); + ref(xinp_data, xinp_data, n, bs, m); + ExpectEQ(xinp_data, y_data, n * bs); + + auto verifier = [](const typename KernelTuple::func_type tgt, + const std::vector& x, const std::vector& yref, + int n, int bs, int m) { + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(yref.size(), x.size()); + EXPECT_EQ(x.size(), static_cast(n * bs)); + const T* x_data = x.data(); + const T* yref_data = yref.data(); + std::vector ytgt(n * bs); + T* ytgt_data = ytgt.data(); + // test normal + tgt(x_data, ytgt_data, n, bs, m); + ExpectEQ(ytgt_data, yref_data, n * bs); + // test inplace x + std::copy(x.begin(), x.end(), ytgt.begin()); + tgt(ytgt_data, ytgt_data, n, bs, m); + ExpectEQ(ytgt_data, yref_data, n * bs); + }; + TestAllImpls(n, verifier, x, y, n, bs, m); + } + } + } +} + +template +void TestKernelStrideASum() { + using T = typename KernelTuple::data_type; + VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type); + for (int d : TestSizes()) { + for (int m : {1, 2, 3}) { // stride + if (m > d || d % m != 0) { + continue; + } + auto ref = jit::GetReferFunc(); + EXPECT_TRUE(ref != nullptr); + std::vector x(d); + RandomVec(d, x.data()); + T ref_res; + ref(x.data(), &ref_res, d, m); + + auto verifier = [](const typename KernelTuple::func_type tgt, + const std::vector& x, const T ref_res, + const int m) { + EXPECT_TRUE(tgt != nullptr); + T tgt_res; + tgt(x.data(), &tgt_res, x.size(), m); + ExpectEQ(&tgt_res, &ref_res, 1); + }; + TestAllImpls(d, verifier, x, ref_res, m); + } + } +} + +template +void TestKernelStrideScal() { + using T = typename KernelTuple::data_type; + VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type); + for (int d : TestSizes()) { + for (int m : {1, 2, 3}) { // stride + if (m > d || d % m != 0) { + continue; + } auto ref = jit::GetReferFunc(); EXPECT_TRUE(ref != nullptr); - std::vector x(bs * n), y(bs * n); - RandomVec(bs * n, x.data()); - const T* x_data = x.data(); - T* y_data = y.data(); - std::vector xinp(x.size()); // inplace test + const T a = static_cast(3); + std::vector x(d), yref(d); + std::vector xinp(d); // inplace test + RandomVec(d, x.data()); std::copy(x.begin(), x.end(), xinp.begin()); - ref(x_data, y_data, n, bs); + + const T* x_data = x.data(); + T* yref_data = yref.data(); T* xinp_data = xinp.data(); - ref(xinp_data, xinp_data, n, bs); - ExpectEQ(xinp_data, y_data, n * bs); + // test refer code inplace + ref(&a, x_data, yref_data, d, m); + ref(&a, xinp_data, xinp_data, d, m); + ExpectEQ(xinp_data, yref_data, d); - auto verifier = [](const typename KernelTuple::func_type tgt, + auto verifier = [](const typename KernelTuple::func_type tgt, const T a, const std::vector& x, const std::vector& yref, - int n, int bs) { + const int m) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(yref.size(), x.size()); - EXPECT_EQ(x.size(), static_cast(n * bs)); const T* x_data = x.data(); const T* yref_data = yref.data(); - std::vector ytgt(n * bs); + const int d = yref.size(); + std::vector ytgt(d); T* ytgt_data = ytgt.data(); // test normal - tgt(x_data, ytgt_data, n, bs); - ExpectEQ(ytgt_data, yref_data, n * bs); + tgt(&a, x_data, ytgt_data, d, m); + ExpectEQ(ytgt_data, yref_data, d); // test inplace x std::copy(x.begin(), x.end(), ytgt.begin()); - tgt(ytgt_data, ytgt_data, n, bs); - ExpectEQ(ytgt_data, yref_data, n * bs); + tgt(&a, ytgt_data, ytgt_data, d, m); + ExpectEQ(ytgt_data, yref_data, d); }; - TestAllImpls(n, verifier, x, y, n, bs); + TestAllImpls(d, verifier, a, x, yref, m); } } } @@ -912,7 +995,7 @@ TEST(JITKernel_pool, more) { EXPECT_EQ(kers.size(), 10UL); #else #ifdef PADDLE_WITH_MKLML - EXPECT_EQ(kers.size(), 21UL); + EXPECT_EQ(kers.size(), 22UL); #else EXPECT_EQ(kers.size(), 8UL); #endif @@ -921,7 +1004,7 @@ TEST(JITKernel_pool, more) { TEST(JITKernel_pool, refer) { const auto& kers = jit::ReferKernelPool::Instance().AllKernels(); - EXPECT_EQ(kers.size(), 29UL); + EXPECT_EQ(kers.size(), 31UL); } // test helper @@ -1292,3 +1375,6 @@ TEST_CPU_KERNEL(MatMul); TEST_CPU_KERNEL(Softmax); TEST_CPU_KERNEL(Sgd); TEST_CPU_KERNEL(VBroadcast); + +TEST_CPU_KERNEL(StrideASum); +TEST_CPU_KERNEL(StrideScal); diff --git a/paddle/fluid/operators/kldiv_loss_op.cc b/paddle/fluid/operators/kldiv_loss_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..a43f22c0496f89943d2fd5110446f1aae6a99315 --- /dev/null +++ b/paddle/fluid/operators/kldiv_loss_op.cc @@ -0,0 +1,171 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/operators/kldiv_loss_op.h" +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class KLDivLossOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of KLDivLossOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Target"), + "Input(Target) of KLDivLossOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Loss"), + "Output(Loss) of KLDivLossOp should not be null."); + + auto dim_x = ctx->GetInputDim("X"); + auto dim_target = ctx->GetInputDim("Target"); + PADDLE_ENFORCE_EQ(dim_x.size(), dim_target.size(), + "Input(X) rank and Input(Target) rank should be same."); + for (int i = 0; i < dim_x.size(); i++) { + PADDLE_ENFORCE_EQ(dim_x[i], dim_target[i], + "Input(X) and Input(Target) should in same shape."); + } + + auto reduction = ctx->Attrs().Get("reduction"); + + PADDLE_ENFORCE( + "mean" == reduction || "sum" == reduction || "batchmean" == reduction || + "none" == reduction, + "Attr(reduction) can only be 'none'|'batchmean'|'sum'|'mean'."); + + if ("none" == reduction) { + ctx->SetOutputDim("Loss", dim_x); + } else { + ctx->SetOutputDim("Loss", {1}); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(ctx.Input("X")->type(), + ctx.GetPlace()); + } +}; + +class KLDivLossOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "The input tensor of KL divergence loss operator. " + "This is a tensor with shape of [N, *], where N is the " + "batch size, * means any number of additional dimensions."); + AddInput("Target", + "The tensor of KL divergence loss operator. " + "This is a tensor with shape of Input(X)."); + AddOutput( + "Loss", + "The output KL divergence loss tensor. if Attr(reduction) is " + "'none', this tensor should be in same shape of of Input(X), else " + "this tensor should be in shape of [1]."); + + AddAttr( + "reduction", + "The reduction type to apply to the output, available types " + "are 'none' | 'batchmean' | 'mean' | 'sum', 'none' for no " + "reduction, 'batchmean' for the sum of output divided by " + "batch size, 'mean' for the average value of all output, " + "'sum' for the sum of the output.") + .SetDefault("mean"); + + AddComment(R"DOC( + This operator calculates the Kullback-Leibler divergence loss + between Input(X) and Input(Target). + + KL divergence loss is calculated as follows: + + $$l(x, y) = y * (\log(y) - x)$$ + + While :math:`x` is Input(X) and :math:`y` is Input(Target). + + While :attr:`reduction` is :attr:`none`, output loss is in + the same shape as Input(X), loss in each point is calculated + seperately and no reduction is applied. + + While :attr:`reduction` is :attr:`mean`, output loss is in + shape of [1] and loss value is the mean value of all losses. + + While :attr:`reduction` is :attr:`sum`, output loss is in + shape of [1] and loss value is the sum value of all losses. + + While :attr:`reduction` is :attr:`batchmean`, output loss is + in shape of [1] and loss value is the sum value of all losses + divided by batch size. + + )DOC"); + } +}; + +class KLDivLossOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); + PADDLE_ENFORCE(ctx->HasInput("Target"), "Input(Target) should not be null"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Loss")), + "Input(Loss@GRAD) should not be null"); + auto dim_x = ctx->GetInputDim("X"); + if (ctx->HasOutput(framework::GradVarName("X"))) { + ctx->SetOutputDim(framework::GradVarName("X"), dim_x); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(ctx.Input("X")->type(), + ctx.GetPlace()); + } +}; + +class KLDivLossOpGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto* op = new framework::OpDesc(); + op->SetType("kldiv_loss_grad"); + op->SetInput("X", Input("X")); + op->SetInput("Target", Input("Target")); + op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss")); + + op->SetAttrMap(Attrs()); + + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + return std::unique_ptr(op); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(kldiv_loss, ops::KLDivLossOp, ops::KLDivLossOpMaker, + ops::KLDivLossOpGradMaker); +REGISTER_OPERATOR(kldiv_loss_grad, ops::KLDivLossOpGrad); +REGISTER_OP_CPU_KERNEL( + kldiv_loss, ops::KLDivLossKernel, + ops::KLDivLossKernel); +REGISTER_OP_CPU_KERNEL( + kldiv_loss_grad, + ops::KLDivLossGradKernel, + ops::KLDivLossGradKernel); diff --git a/paddle/fluid/operators/kldiv_loss_op.cu b/paddle/fluid/operators/kldiv_loss_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..5226cb8c08e3db4a0bfbbe4440c27264903f06e3 --- /dev/null +++ b/paddle/fluid/operators/kldiv_loss_op.cu @@ -0,0 +1,22 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/fluid/operators/kldiv_loss_op.h" + +namespace ops = paddle::operators; +namespace plat = paddle::platform; +REGISTER_OP_CUDA_KERNEL( + kldiv_loss, + ops::KLDivLossKernel, + ops::KLDivLossKernel); +REGISTER_OP_CUDA_KERNEL( + kldiv_loss_grad, + ops::KLDivLossGradKernel, + ops::KLDivLossGradKernel); diff --git a/paddle/fluid/operators/kldiv_loss_op.h b/paddle/fluid/operators/kldiv_loss_op.h new file mode 100644 index 0000000000000000000000000000000000000000..625e16e298d9f842fa621aca727c6df2cb045301 --- /dev/null +++ b/paddle/fluid/operators/kldiv_loss_op.h @@ -0,0 +1,119 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/hostdevice.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + +using Array1 = Eigen::DSizes; + +template +struct KLDivLossForward { + HOSTDEVICE KLDivLossForward() {} + + HOSTDEVICE T operator()(const T& target, const T& input) const { + if (target <= 0) { + return 0; + } else { + return target * (std::log(target) - input); + } + } +}; + +template +struct KLDivLossBackward { + HOSTDEVICE KLDivLossBackward() {} + + HOSTDEVICE T operator()(const T& target, const T& grad) const { + if (target <= 0) { + return 0; + } else { + return static_cast(-1.) * grad; + } + } +}; + +template +class KLDivLossKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& place = *ctx.template device_context().eigen_device(); + auto* input = ctx.Input("X"); + auto* target = ctx.Input("Target"); + auto* loss = ctx.Output("Loss"); + auto reduction = ctx.Attr("reduction"); + + const int n = input->dims()[0]; + + loss->mutable_data(ctx.GetPlace()); + auto input_t = EigenVector::Flatten(*input); + auto target_t = EigenVector::Flatten(*target); + auto loss_t = EigenVector::Flatten(*loss); + auto output = target_t.binaryExpr(input_t, KLDivLossForward()); + if ("none" == reduction) { + loss_t.device(place) = output; + } else if ("batchmean" == reduction) { + auto output_sum = output.sum().eval(); + loss_t.device(place) = output_sum / output_sum.constant(n); + } else if ("mean" == reduction) { + loss_t.device(place) = output.mean(); + } else if ("sum" == reduction) { + loss_t.device(place) = output.sum(); + } + } +}; + +template +class KLDivLossGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& place = *ctx.template device_context().eigen_device(); + auto* target = ctx.Input("Target"); + auto reduction = ctx.Attr("reduction"); + auto* input_grad = ctx.Output(framework::GradVarName("X")); + auto* loss_grad = ctx.Input(framework::GradVarName("Loss")); + + const int n = input_grad->dims()[0]; + const int numel = input_grad->numel(); + const int expand = numel / loss_grad->numel(); + + input_grad->mutable_data(ctx.GetPlace()); + + auto target_t = EigenVector::Flatten(*target); + + auto input_grad_t = EigenVector::Flatten(*input_grad); + auto loss_grad_t = EigenVector::Flatten(*loss_grad); + + auto loss_grad_expand = loss_grad_t.broadcast(Array1(expand)); + auto grad_t = target_t * loss_grad_expand; + input_grad_t.device(place) = + target_t.binaryExpr(grad_t, KLDivLossBackward()); + + if ("mean" == reduction) { + input_grad_t.device(place) = input_grad_t / static_cast(numel); + } else if ("batchmean" == reduction) { + input_grad_t.device(place) = input_grad_t / static_cast(n); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/math/softmax.h b/paddle/fluid/operators/math/softmax.h index 81beef56d9424b968932fdc4ca723099632c183a..a7a30a71e4cf176987cc75be1958a762a08b09ae 100644 --- a/paddle/fluid/operators/math/softmax.h +++ b/paddle/fluid/operators/math/softmax.h @@ -23,15 +23,16 @@ template class SoftmaxFunctor { public: - void operator()(const DeviceContext& context, const framework::Tensor* X, - framework::Tensor* Y); + void operator()(const DeviceContext& context, const int axis_dim, + const framework::Tensor* X, framework::Tensor* Y); }; template class SoftmaxGradFunctor { public: - void operator()(const DeviceContext& context, const framework::Tensor* y, - const framework::Tensor* y_grad, framework::Tensor* x_grad); + void operator()(const DeviceContext& context, const int axis_dim, + const framework::Tensor* y, const framework::Tensor* y_grad, + framework::Tensor* x_grad); }; #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/operators/math/softmax_impl.h b/paddle/fluid/operators/math/softmax_impl.h index d77b6712c548370a99e350b73ab86b170c0e17dc..6f6f33345f5336a8b8ff100c0286914ef629283f 100644 --- a/paddle/fluid/operators/math/softmax_impl.h +++ b/paddle/fluid/operators/math/softmax_impl.h @@ -36,8 +36,8 @@ struct ValueClip { template void SoftmaxFunctor::operator()( - const DeviceContext& context, const framework::Tensor* X, - framework::Tensor* Y) { + const DeviceContext& context, const int axis_dim, + const framework::Tensor* X, framework::Tensor* Y) { auto logits = EigenMatrix::From(*X); auto softmax = EigenMatrix::From(*Y); @@ -46,10 +46,13 @@ void SoftmaxFunctor::operator()( const int batch_size = logits.dimension(kBatchDim); const int num_classes = logits.dimension(kClassDim); + const int num_remain = num_classes / axis_dim; Eigen::DSizes along_class(kClassDim); Eigen::DSizes batch_by_one(batch_size, 1); Eigen::DSizes one_by_class(1, num_classes); + Eigen::DSizes batch_axis_remain(batch_size, axis_dim, num_remain); + Eigen::DSizes one_axis(1, axis_dim); auto shifted_logits = (logits - logits.maximum(along_class) @@ -60,11 +63,11 @@ void SoftmaxFunctor::operator()( softmax.device(*context.eigen_device()) = shifted_logits.exp(); softmax.device(*context.eigen_device()) = (softmax * - softmax.sum(along_class) + softmax.reshape(batch_axis_remain) + .sum(along_class) .inverse() .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)); + .broadcast(one_axis)); } template @@ -73,8 +76,8 @@ using enable_if_CPU = typename std::enable_if< template class SoftmaxFunctor> { - void operator()(const DeviceContext& context, const framework::Tensor* X, - framework::Tensor* Y) { + void operator()(const DeviceContext& context, const int axis_dim, + const framework::Tensor* X, framework::Tensor* Y) { auto in_dims = X->dims(); const float* in_data = X->data(); float* out_data = Y->data(); @@ -84,14 +87,16 @@ class SoftmaxFunctor> { auto compute_softmax = jit::KernelFuncs, platform::CPUPlace>::Cache() .At(in_dims[kClassDim]); - compute_softmax(in_data, out_data, in_dims[kClassDim], in_dims[kBatchDim]); + compute_softmax(in_data, out_data, in_dims[kClassDim], in_dims[kBatchDim], + in_dims[kClassDim] / axis_dim); } }; template void SoftmaxGradFunctor::operator()( - const DeviceContext& context, const framework::Tensor* y, - const framework::Tensor* y_grad, framework::Tensor* x_grad) { + const DeviceContext& context, const int axis_dim, + const framework::Tensor* y, const framework::Tensor* y_grad, + framework::Tensor* x_grad) { auto softmax = EigenMatrix::From(*y); auto softmax_grad = EigenMatrix::From(*y_grad); auto logits_grad = EigenMatrix::From(*x_grad); @@ -101,16 +106,19 @@ void SoftmaxGradFunctor::operator()( const int batch_size = softmax.dimension(kBatchDim); const int num_classes = softmax.dimension(kClassDim); + const int num_remain = num_classes / axis_dim; Eigen::DSizes along_class(kClassDim); Eigen::DSizes batch_by_one(batch_size, 1); Eigen::DSizes one_by_class(1, num_classes); + Eigen::DSizes batch_axis_remain(batch_size, axis_dim, num_remain); + Eigen::DSizes one_axis(1, axis_dim); auto dot = (softmax * softmax_grad) + .reshape(batch_axis_remain) .sum(along_class) .eval() - .reshape(batch_by_one) - .broadcast(one_by_class); + .broadcast(one_axis); logits_grad.device(*context.eigen_device()) = (softmax_grad - dot) * softmax; } diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index db44bd394a2ce280c06274f728dcf95d266f94cf..1c2f5eae8d8dd88481aad0a7d7f86a588f5c480d 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -39,6 +39,20 @@ class SoftmaxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of SoftmaxOp should not be null."); + auto dim_x = ctx->GetInputDim("X"); + auto rank_x = dim_x.size(); + auto axis = ctx->Attrs().Get("axis"); + PADDLE_ENFORCE(axis >= -rank_x && axis < rank_x, + "Attr(axis) value should be in range [-R, R-1], " + "R is the rank of Input(X)."); + + auto use_cudnn = ctx->Attrs().Get("use_cudnn"); + auto use_mkldnn = ctx->Attrs().Get("use_mkldnn"); + if (axis != rank_x - 1 && axis != -1) { + PADDLE_ENFORCE(!use_cudnn, "CUDNN kernel only support axis as -1."); + PADDLE_ENFORCE(!use_mkldnn, "MKLDNN kernel only support axis as -1."); + } + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->ShareLoD("X", /*->*/ "Out"); } @@ -80,8 +94,12 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("X", "The input tensor of softmax, " - "whose last dimension is the input_feature_dimensions."); + "whose dimension :attr:`axis` is the input_feature_dimensions."); AddOutput("Out", "The normalized values with the same shape as X."); + AddAttr("axis", + "The dimension index of Input(x) to perform softmax," + "default -1 for last dimension") + .SetDefault(-1); AddAttr( "use_cudnn", "(bool, default false) Only used in cudnn kernel, need install cudnn") @@ -106,12 +124,13 @@ Softmax Operator. The input of the softmax operator is a tensor of any rank. The output tensor has the same shape as the input. -The input tensor will first be logically flattened to a 2-D matrix. The matrix's -second dimension(row length) is as same as the last dimension of the input +The dimension :attr:`axis` of the input tensor will be permuted to the last. +Then the input tensor will be logically flattened to a 2-D matrix. The matrix's +second dimension(row length) is as same as the dimension :attr:`axis` of the input tensor, and the first dimension(column length) is the product of all other dimensions of the input tensor. For each row of the matrix, the softmax operator squashes the K-dimensional(K is the width of the matrix, which is also the size -of the input tensor's last dimension) vector of arbitrary real values to a +of the input tensor's dimension :attr:`axis`) vector of arbitrary real values to a K-dimensional vector of real values in the range [0, 1] that add up to 1. It computes the exponential of the given dimension and the sum of exponential values of all the other dimensions in the K-dimensional vector input. diff --git a/paddle/fluid/operators/softmax_op.h b/paddle/fluid/operators/softmax_op.h index 91829d5761bfdd1f9806af6589a2967fe866fec8..a964c3b57a635b3e5f0a4c163e3b3c13d465102b 100644 --- a/paddle/fluid/operators/softmax_op.h +++ b/paddle/fluid/operators/softmax_op.h @@ -20,6 +20,30 @@ namespace paddle { namespace operators { using Tensor = framework::Tensor; +using DDim = framework::DDim; + +static inline int CanonicalAxis(const int axis, const int rank) { + if (axis < 0) { + return axis + rank; + } + return axis; +} + +static inline int SizeToAxis(const int axis, DDim dims) { + int size = 1; + for (int i = 0; i < axis; i++) { + size *= dims[i]; + } + return size; +} + +static inline int SizeFromAxis(const int axis, DDim dims) { + int size = 1; + for (int i = axis; i < dims.size(); i++) { + size *= dims[i]; + } + return size; +} template class SoftmaxKernel : public framework::OpKernel { @@ -27,20 +51,27 @@ class SoftmaxKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); auto* Out = context.Output("Out"); + const int rank = X->dims().size(); + const int axis = CanonicalAxis(context.Attr("axis"), rank); + int axis_dim = X->dims()[axis]; // allocate memory on device. Out->mutable_data(context.GetPlace()); - int rank = X->dims().size(); - Tensor X_2d = framework::ReshapeToMatrix(*X, rank - 1); - Tensor Out_2d = framework::ReshapeToMatrix(*Out, rank - 1); + const int n = SizeToAxis(axis, X->dims()); + const int d = SizeFromAxis(axis, X->dims()); + Tensor X_2d, Out_2d; + X_2d.ShareDataWith(*X).Resize({n, d}); + Out_2d.ShareDataWith(*Out).Resize({n, d}); #ifdef PADDLE_ON_INFERENCE math::SoftmaxFunctor()( - context.template device_context(), &X_2d, &Out_2d); + context.template device_context(), axis_dim, &X_2d, + &Out_2d); #else math::SoftmaxFunctor()( - context.template device_context(), &X_2d, &Out_2d); + context.template device_context(), axis_dim, &X_2d, + &Out_2d); #endif } }; @@ -52,18 +83,23 @@ class SoftmaxGradKernel : public framework::OpKernel { auto* Out = context.Input("Out"); auto* dOut = context.Input(framework::GradVarName("Out")); auto* dX = context.Output(framework::GradVarName("X")); + const int rank = dX->dims().size(); + const int axis = CanonicalAxis(context.Attr("axis"), rank); + int axis_dim = dX->dims()[axis]; // allocate memory on device. dX->mutable_data(context.GetPlace()); - int rank = Out->dims().size(); - Tensor Out_2d = framework::ReshapeToMatrix(*Out, rank - 1); - Tensor dOut_2d = framework::ReshapeToMatrix(*dOut, rank - 1); - Tensor dX_2d = framework::ReshapeToMatrix(*dX, rank - 1); + const int n = SizeToAxis(axis, dX->dims()); + const int d = SizeFromAxis(axis, dX->dims()); + Tensor dX_2d, Out_2d, dOut_2d; + dX_2d.ShareDataWith(*dX).Resize({n, d}); + Out_2d.ShareDataWith(*Out).Resize({n, d}); + dOut_2d.ShareDataWith(*dOut).Resize({n, d}); math::SoftmaxGradFunctor()( - context.template device_context(), &Out_2d, &dOut_2d, - &dX_2d); + context.template device_context(), axis_dim, &Out_2d, + &dOut_2d, &dX_2d); } }; diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.h b/paddle/fluid/operators/softmax_with_cross_entropy_op.h index c0530e3d8bc407ddd6d7bf6e10a715185d0beb1f..1042cbdcf5e96f0dd3780793cf1f233dc32c3eec 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.h +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.h @@ -40,10 +40,12 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { softmax->mutable_data(context.GetPlace()); loss->mutable_data(context.GetPlace()); + int axis_dim = logits->dims()[logits->dims().size() - 1]; + auto& dev_ctx = context.template device_context(); math::SoftmaxFunctor()( - dev_ctx, logits, softmax); + dev_ctx, axis_dim, logits, softmax); math::CrossEntropyFunctor()( dev_ctx, loss, softmax, labels, context.Attr("soft_label"), context.Attr("ignore_index")); diff --git a/paddle/fluid/operators/temporal_shift_op.cc b/paddle/fluid/operators/temporal_shift_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..7df649fc5b7bf8671303a28d727be1d85c1fa6e4 --- /dev/null +++ b/paddle/fluid/operators/temporal_shift_op.cc @@ -0,0 +1,155 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/operators/temporal_shift_op.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class TemporalShiftOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of TemporalShiftOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of TemporalShiftOp should not be null."); + + auto dim_x = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(dim_x.size(), 4, + "Input(X) rank should be 4 in shape of [N*T, C, H, W]."); + + int seg_num = ctx->Attrs().Get("seg_num"); + float shift_ratio = ctx->Attrs().Get("shift_ratio"); + PADDLE_ENFORCE_GT(seg_num, 0, "Attr(seg_num) should be greater than 0."); + PADDLE_ENFORCE(shift_ratio > 0 || shift_ratio < .5, + "Attr(shift_ratio) should be greater than 0 and less " + "than 0.5."); + + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_EQ( + dim_x[0] % seg_num, 0, + "Input(X) dims[0] should be divided exactly by Attr(seg_num)."); + } + + ctx->SetOutputDim("Out", dim_x); + ctx->ShareLoD("X", "Out"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(ctx.Input("X")->type(), + ctx.GetPlace()); + } +}; + +class TemporalShiftOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "The input tensor of temporal shift operator. " + "This is a 4-D tensor with shape of [N*T, C, H, W]. " + "While N is the batch size, T is the temporal segment " + "number, C is the channel number, H is the height of " + "features and W is the width of features."); + AddOutput("Out", + "The output tensor of temporal shift operator. " + "This is a 4-D tensor in the same shape with Input(X)."); + + AddAttr("seg_num", + "The temporal segment number, this should be a positive " + "integer."); + AddAttr( + "shift_ratio", + "The shift ratio of the channels, the first :attr:`shift_ratio` part " + "of channels will be shifted by -1 along the temporal dimension, " + "and the second :attr:`shift_ratio` part of channels will be shifted " + "by 1 along the temporal dimension. Default 0.25.") + .SetDefault(0.25); + + AddComment(R"DOC( + This operator calculates the temporal shifting features for Input(X). + + Input(X) should be in shape of [N*T, C, H, W], while N is the batch + size, T is the temporal segment number specified by :attr:`seg_num`, + C is the channel number, H and W is the height and width of features. + + Temporal Shifting is calculated as follows: + + Step 1: Reshape Input(X) to [N, T, C, H, W]. + + Step 2: Pad 0 to reshaping result in the 2nd(T) dimension with + padding width as 1 on each side, padding result will be in shape + of [N, T+2, C, H, W]. + + Step 3: Assume :attr:`shift_ratio` is :math:`1/4`, slice padding + result as follows: + + $$ + slice1 = x[:, :T, :C/4, :, :] + $$ + $$ + slice2 = x[:, 2:T+2, C/4:C/2, :, :] + $$ + $$ + slice3 = x[:, 1:T+1, C/2:, :, :] + $$ + + Step 4: Concatenate three slices along the 3rd(C) dimension and + reshape result to [N*T, C, H, W]. + + For details of temporal shifting, please refer to paper: + `Temporal Shift Module `_ . + + )DOC"); + } +}; + +class TemporalShiftOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) should not be null"); + auto dim_x = ctx->GetInputDim("X"); + if (ctx->HasOutput(framework::GradVarName("X"))) { + ctx->SetOutputDim(framework::GradVarName("X"), dim_x); + } + } + + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(ctx.Input("X")->type(), + ctx.GetPlace()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(temporal_shift, ops::TemporalShiftOp, + ops::TemporalShiftOpMaker, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(temporal_shift_grad, ops::TemporalShiftOpGrad); +REGISTER_OP_CPU_KERNEL(temporal_shift, ops::TemporalShiftKernel, + ops::TemporalShiftKernel); +REGISTER_OP_CPU_KERNEL(temporal_shift_grad, ops::TemporalShiftGradKernel, + ops::TemporalShiftGradKernel); diff --git a/paddle/fluid/operators/temporal_shift_op.cu b/paddle/fluid/operators/temporal_shift_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..24f1f8e178eb51aa7230d6c8c8f69d5beb728940 --- /dev/null +++ b/paddle/fluid/operators/temporal_shift_op.cu @@ -0,0 +1,168 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/operators/temporal_shift_op.h" +#include "paddle/fluid/platform/cuda_primitives.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +template +__global__ void KeTemporalShiftFw(const T* input, T* output, const int ntchw, + const int tchw, const int chw, const int hw, + const int w, const int t, const int c, + const float shift_ratio) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + int src_it = 0; + for (; tid < ntchw; tid += stride) { + int in = tid / tchw; + int it = (tid % tchw) / chw; + int ic = (tid % chw) / hw; + int ih = (tid % hw) / w; + int iw = tid % w; + + const int c1 = static_cast(c * shift_ratio); + const int c2 = static_cast(c * 2 * shift_ratio); + + if (ic < c1) { + src_it = it - 1; + } else if (ic < c2) { + src_it = it + 1; + } else { + src_it = it; + } + + if (src_it < 0 || src_it >= t) { + output[tid] = 0; + } else { + int src_idx = GetEntryIndex(in, src_it, ic, ih, iw, tchw, chw, hw, w); + output[tid] = input[src_idx]; + } + } +} + +template +__global__ void KeTemporalShiftBw(const T* output_grad, T* input_grad, + const int ntchw, const int tchw, + const int chw, const int hw, const int w, + const int t, const int c, + const float shift_ratio) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + int src_it = 0; + for (; tid < ntchw; tid += stride) { + int in = tid / tchw; + int it = (tid % tchw) / chw; + int ic = (tid % chw) / hw; + int ih = (tid % hw) / w; + int iw = tid % w; + + const int c1 = static_cast(c * shift_ratio); + const int c2 = static_cast(c * 2 * shift_ratio); + + if (ic < c1) { + src_it = it - 1; + } else if (ic < c2) { + src_it = it + 1; + } else { + src_it = it; + } + + if (src_it >= 0 && src_it < t) { + int src_idx = GetEntryIndex(in, src_it, ic, ih, iw, tchw, chw, hw, w); + input_grad[src_idx] = output_grad[tid]; + } + } +} + +template +class TemporalShiftOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), + "This kernel only runs on GPU device."); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + int t = ctx.Attr("seg_num"); + float shift_ratio = ctx.Attr("shift_ratio"); + + const int nt = input->dims()[0]; + const int c = input->dims()[1]; + const int h = input->dims()[2]; + const int w = input->dims()[3]; + + const int hw = h * w; + const int chw = c * hw; + const int tchw = t * chw; + const int ntchw = nt * chw; + + const T* input_data = input->data(); + T* output_data = output->mutable_data({nt, c, h, w}, ctx.GetPlace()); + + int pixelNum = nt * chw; + int grid_dim = (pixelNum + 512 - 1) / 512; + grid_dim = grid_dim > 8 ? 8 : grid_dim; + + KeTemporalShiftFw< + T><<>>( + input_data, output_data, ntchw, tchw, chw, hw, w, t, c, shift_ratio); + } +}; + +template +class TemporalShiftGradOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input_grad = ctx.Output(framework::GradVarName("X")); + auto* output_grad = ctx.Input(framework::GradVarName("Out")); + int t = ctx.Attr("seg_num"); + float shift_ratio = ctx.Attr("shift_ratio"); + + const int nt = output_grad->dims()[0]; + const int c = output_grad->dims()[1]; + const int h = output_grad->dims()[2]; + const int w = output_grad->dims()[3]; + + const int hw = h * w; + const int chw = c * hw; + const int tchw = t * chw; + const int ntchw = nt * chw; + + const T* output_grad_data = output_grad->data(); + T* input_grad_data = + input_grad->mutable_data({nt, c, h, w}, ctx.GetPlace()); + math::SetConstant()( + ctx.template device_context(), input_grad, + static_cast(0)); + + int pixelNum = nt * chw; + int grid_dim = (pixelNum + 512 - 1) / 512; + grid_dim = grid_dim > 8 ? 8 : grid_dim; + + KeTemporalShiftBw< + T><<>>( + output_grad_data, input_grad_data, ntchw, tchw, chw, hw, w, t, c, + shift_ratio); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL(temporal_shift, ops::TemporalShiftOpCUDAKernel, + ops::TemporalShiftOpCUDAKernel); +REGISTER_OP_CUDA_KERNEL(temporal_shift_grad, + ops::TemporalShiftGradOpCUDAKernel, + ops::TemporalShiftGradOpCUDAKernel); diff --git a/paddle/fluid/operators/temporal_shift_op.h b/paddle/fluid/operators/temporal_shift_op.h new file mode 100644 index 0000000000000000000000000000000000000000..4c7eed5af471a18768eda6597472c0ad592ccbd0 --- /dev/null +++ b/paddle/fluid/operators/temporal_shift_op.h @@ -0,0 +1,129 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +static HOSTDEVICE inline int GetEntryIndex(int in, int it, int ic, int ih, + int iw, const int tchw, + const int chw, const int hw, + const int w) { + return in * tchw + it * chw + ic * hw + ih * w + iw; +} + +template +class TemporalShiftKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + int t = ctx.Attr("seg_num"); + float shift_ratio = ctx.Attr("shift_ratio"); + + const int nt = input->dims()[0]; + const int c = input->dims()[1]; + const int h = input->dims()[2]; + const int w = input->dims()[3]; + + const int c1 = static_cast(c * shift_ratio); + const int c2 = static_cast(c * 2 * shift_ratio); + + const int hw = h * w; + const int chw = c * hw; + const int tchw = t * chw; + + const T* input_data = input->data(); + T* output_data = output->mutable_data({nt, c, h, w}, ctx.GetPlace()); + + int src_it = 0; + for (int i = 0; i < output->numel(); i++) { + int in = i / tchw; + int it = (i % tchw) / chw; + int ic = (i % chw) / hw; + int ih = (i % hw) / w; + int iw = i % w; + + if (ic < c1) { + src_it = it - 1; + } else if (ic < c2) { + src_it = it + 1; + } else { + src_it = it; + } + + if (src_it < 0 || src_it >= t) { + output_data[i] = 0; + } else { + int src_idx = GetEntryIndex(in, src_it, ic, ih, iw, tchw, chw, hw, w); + output_data[i] = input_data[src_idx]; + } + } + } +}; + +template +class TemporalShiftGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input_grad = ctx.Output(framework::GradVarName("X")); + auto* output_grad = ctx.Input(framework::GradVarName("Out")); + int t = ctx.Attr("seg_num"); + float shift_ratio = ctx.Attr("shift_ratio"); + + const int nt = output_grad->dims()[0]; + const int c = output_grad->dims()[1]; + const int h = output_grad->dims()[2]; + const int w = output_grad->dims()[3]; + + const int c1 = static_cast(c * shift_ratio); + const int c2 = static_cast(c * 2 * shift_ratio); + + const int hw = h * w; + const int chw = c * hw; + const int tchw = t * chw; + + const T* output_grad_data = output_grad->data(); + T* input_grad_data = + input_grad->mutable_data({nt, c, h, w}, ctx.GetPlace()); + memset(input_grad_data, 0, input_grad->numel() * sizeof(T)); + + int src_it = 0; + for (int i = 0; i < output_grad->numel(); i++) { + int in = i / tchw; + int it = (i % tchw) / chw; + int ic = (i % chw) / hw; + int ih = (i % hw) / w; + int iw = i % w; + + if (ic < c1) { + src_it = it - 1; + } else if (ic < c2) { + src_it = it + 1; + } else { + src_it = it; + } + + if (src_it >= 0 && src_it < t) { + int src_idx = GetEntryIndex(in, src_it, ic, ih, iw, tchw, chw, hw, w); + input_grad_data[src_idx] = output_grad_data[i]; + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index c36673312489738ad0475a0b70a23a1c6c948b9d..7f470924b337d59943c04ab0ff2820555f961732 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -52,6 +52,7 @@ class TensorRTEngineOp : public framework::OperatorBase { std::string engine_key_; std::string engine_serialized_data_; bool calibration_mode_; + int device_id_; public: TensorRTEngineOp(const std::string &type, @@ -62,6 +63,7 @@ class TensorRTEngineOp : public framework::OperatorBase { input_names_ = Inputs("Xs"); max_batch_size_ = Attr("max_batch_size"); workspace_size_ = Attr("workspace_size"); + device_id_ = Attr("gpu_id"); enable_int8_ = Attr("enable_int8"); calibration_data_ = Attr("calibration_data"); engine_key_ = Attr("engine_key"); @@ -79,6 +81,17 @@ class TensorRTEngineOp : public framework::OperatorBase { if (enable_int8_ && calibration_data_.size()) { calibrator_.reset(new TRTInt8Calibrator(calibration_data_)); } + + if (!calibration_mode_ && !engine_serialized_data_.empty()) { + trt_engine_.reset(new inference::tensorrt::TensorRTEngine( + max_batch_size_, workspace_size_, enable_int8_, calibrator_.get(), + device_id_)); + PADDLE_ENFORCE(engine_serialized_data_.size(), + "TRT serialized data should not be empty here," + "there must be error when generate serialized data in TRT " + "subgraph detect pass."); + trt_engine_->Deserialize(engine_serialized_data_); + } } protected: @@ -225,12 +238,8 @@ class TensorRTEngineOp : public framework::OperatorBase { if (!trt_engine_) { trt_engine_.reset(new inference::tensorrt::TensorRTEngine( max_batch_size_, workspace_size_, enable_int8_, calibrator_.get(), - boost::get(dev_place).device)); - if (!engine_serialized_data_.empty()) { - trt_engine_->Deserialize(engine_serialized_data_); - } else { - PrepareTRTEngine(scope, trt_engine_.get()); - } + device_id_)); + PrepareTRTEngine(scope, trt_engine_.get()); } return trt_engine_.get(); } diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc index e7ad2f4fe0c654d8928f5793c1ad8052ab766fb5..cc4d8d6e6f7e24dcb04ed0f58e63cb13ce176bdb 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc @@ -108,6 +108,8 @@ TEST(TensorRTEngineOp, manual) { std::vector({"z0"})); engine_op_desc.SetAttr("subgraph", std::string(block_->SerializeAsString())); engine_op_desc.SetAttr("engine_serialized_data", std::string("")); + int device_id = 0; + engine_op_desc.SetAttr("gpu_id", device_id); LOG(INFO) << "create engine op"; auto engine_op = framework::OpRegistry::CreateOp(engine_op_desc); @@ -204,6 +206,8 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) { std::vector({"z3"})); engine_op_desc.SetAttr("subgraph", std::string(block_->SerializeAsString())); engine_op_desc.SetAttr("engine_serialized_data", std::string("")); + int device_id = 0; + engine_op_desc.SetAttr("gpu_id", device_id); auto engine_op = framework::OpRegistry::CreateOp(engine_op_desc); diff --git a/paddle/fluid/operators/warpctc_cudnn_op.cu.cc b/paddle/fluid/operators/warpctc_cudnn_op.cu.cc index a764d59410c90535dbda0b3f11e89ae9bf578c04..2a744f66f1cef8090ae433270be5e5fede0eaa38 100644 --- a/paddle/fluid/operators/warpctc_cudnn_op.cu.cc +++ b/paddle/fluid/operators/warpctc_cudnn_op.cu.cc @@ -67,9 +67,11 @@ class CudnnCTCKernel : public framework::OpKernel { softmax_logits.mutable_data(logits->dims(), ctx.GetPlace()); softmax_logits.set_lod(logits_lod); int rank = logits->dims().size(); + int axis_dim = logits->dims()[rank - 1]; Tensor in_2d = framework::ReshapeToMatrix(*logits, rank - 1); Tensor out_2d = framework::ReshapeToMatrix(softmax_logits, rank - 1); - math::SoftmaxFunctor()(dev_ctx, &in_2d, &out_2d); + math::SoftmaxFunctor()(dev_ctx, axis_dim, &in_2d, + &out_2d); // ctc needs sequences data stored in transposed padding format // logits and grad using padding data of layout 'TNC' diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index c3db59563f3ae77acd860216b34d2cfb4f8b6560..f889e2e9658eecb4c1931390122fc8b7915bc303 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -44,9 +44,12 @@ add_subdirectory(dynload) cc_library(cpu_helper SRCS cpu_helper.cc DEPS cblas enforce) cc_test(cpu_helper_test SRCS cpu_helper_test.cc DEPS cpu_helper) +set(dgc_deps "") IF(WITH_GPU) set(GPU_CTX_DEPS dynload_cuda dynamic_loader) - set(dgc_deps dgc) + if(NOT WIN32) + set(dgc_deps dgc) + endif() ELSE() set(dgc_deps) ENDIF() diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index e5135683112b56d48fc7f380c85595df3b83ec6d..dea9faf5a751f6a5afe3a2ed235ac0a815f102aa 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -34,7 +34,7 @@ from . import io from . import evaluator from . import initializer from . import layers -from . import imperative +from . import dygraph from . import contrib from . import nets from . import optimizer @@ -71,7 +71,7 @@ __all__ = framework.__all__ + executor.__all__ + \ 'initializer', 'layers', 'contrib', - 'imperative', + 'dygraph', 'transpiler', 'nets', 'optimizer', @@ -180,7 +180,7 @@ def __bootstrap__(): 'cudnn_exhaustive_search', 'memory_optimize_debug', 'selected_gpus', 'sync_nccl_allreduce', 'limit_of_tmp_allocation', 'times_excess_than_required_tmp_allocation', - 'enable_inplace_whitelist' + 'enable_inplace_whitelist', 'cudnn_batchnorm_spatial_persistent' ] core.init_gflags([sys.argv[0]] + ["--tryfromenv=" + ",".join(read_env_flags)]) diff --git a/python/paddle/fluid/contrib/__init__.py b/python/paddle/fluid/contrib/__init__.py index 870c57e54011361caae5265201d19f58830a87bc..7442059ba07b2ed1d7164b9be60b8bbc92fec651 100644 --- a/python/paddle/fluid/contrib/__init__.py +++ b/python/paddle/fluid/contrib/__init__.py @@ -30,6 +30,8 @@ from . import slim from .slim import * from . import utils from .utils import * +from . import extend_optimizer +from .extend_optimizer import * __all__ = [] __all__ += decoder.__all__ @@ -40,3 +42,4 @@ __all__ += int8_inference.__all__ __all__ += reader.__all__ __all__ += slim.__all__ __all__ += utils.__all__ +__all__ += extend_optimizer.__all__ diff --git a/python/paddle/fluid/contrib/extend_optimizer/__init__.py b/python/paddle/fluid/contrib/extend_optimizer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..697ea0f05ae725cbda66e2568cf212bd69cb8787 --- /dev/null +++ b/python/paddle/fluid/contrib/extend_optimizer/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +from . import extend_optimizer_with_weight_decay +from .extend_optimizer_with_weight_decay import * + +__all__ = [] +__all__ += extend_optimizer_with_weight_decay.__all__ diff --git a/python/paddle/fluid/contrib/extend_optimizer/extend_optimizer_with_weight_decay.py b/python/paddle/fluid/contrib/extend_optimizer/extend_optimizer_with_weight_decay.py new file mode 100644 index 0000000000000000000000000000000000000000..fcc99c07346eaa8adc58b0dc7ceca37a1fb72872 --- /dev/null +++ b/python/paddle/fluid/contrib/extend_optimizer/extend_optimizer_with_weight_decay.py @@ -0,0 +1,152 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddle.fluid +from paddle.fluid import framework as framework + +__all__ = ["extend_with_decoupled_weight_decay"] + + +class DecoupledWeightDecay(object): + def __init__(self, coeff=0.0, apply_decay_param_fun=None, **kwargs): + if not isinstance(coeff, float) and \ + not isinstance(coeff, framework.Variable): + raise TypeError("coeff should be float or Variable.") + self._params_name = set() + self._apply_decay_param_fun = apply_decay_param_fun + self._coeff = coeff + super(DecoupledWeightDecay, self).__init__(**kwargs) + + def _scale_parameters(self, params_and_grads): + """ + Adds weight decay ops. + scaled_parameter = parameter * coeff + + Args: + params_and_grads: A list of (parameters, gradients) pairs, + the parameters need to decay. + Raises: + Exception: The type of coeff and parameter is not consistent. + """ + if isinstance(self._coeff, float) and self._coeff == 0.0: + return + + scaled_params = [] + for param, grad in params_and_grads: + # If no gradient then we don't need to do anything + if grad is None: + continue + if self._apply_decay_param_fun is not None \ + and not self._apply_decay_param_fun(param.name): + continue + + if isinstance(self._coeff, float): + assert param.dtype is not paddle.fluid.core.VarDesc.VarType.FP32, \ + "the type of coeff(float) and parameter(%s) is not consistent."%(self._coeff.dtype) + else: + assert self._coeff.dtype == param.dtype, \ + "the type of coeff(%s) and parameter(%s) is not consistent."%(self._coeff.dtype, param.dtype) + + with param.block.program._optimized_guard( + [param, grad]), framework.name_scope('weight decay'): + assert param.name not in self._params_name + scaled_params.append((param, grad, param * self._coeff)) + self._params_name.add(param.name) + return scaled_params + + def backward(self, **kargs): + return super(DecoupledWeightDecay, self).backward(**kargs) + + def apply_optimize(self, **kargs): + return super(DecoupledWeightDecay, self).apply_optimize(**kargs) + + def minimize(self, + loss, + startup_program=None, + parameter_list=None, + no_grad_set=None): + params_grads = self.backward( + loss=loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) + scaled_params = self._scale_parameters(params_grads) + for p_grad_sgrad in scaled_params: + param, grad, scaled_param = p_grad_sgrad + with param.block.program._optimized_guard( + [param, grad]), framework.name_scope('weight decay'): + updated_param = paddle.fluid.layers.elementwise_sub( + x=param, y=scaled_param) + paddle.fluid.layers.assign(input=updated_param, output=param) + + optimize_ops = self.apply_optimize( + loss=loss, + params_grads=params_grads, + startup_program=startup_program) + return optimize_ops, params_grads + + def __str__(self): + return " ".join(["Weight Decay, params:", ",".join(self._params_name)]) + + +def extend_with_decoupled_weight_decay(base_optimizer): + """ + extend_with_decoupled_weight_decay is a decorator function, it returns an + optimizer class with decoupled weight decay. The returned optimizer will + apply weight decay on the optimized parameters with the parameters before + optimization, i.e: new_parameter = optimized_parameter - parameter * coeff. + The details of decoupled weight decay yplease refer to this + `DECOUPLED WEIGHT DECAY REGULARIZATION `_. + + Args: + base_optimizer (Optimizer): The base_optimizer should be a derived class of Optimizer. + + Returns: + OptimizerWithDecoupledWeightDecay: the optimizer with decouple weight decay. + + Examples: + + .. code-block:: python + + AdamW = fluid.contrib.extend_with_decoupled_weight_decay( + fluid.optimizer.Adam) + optimizer = AdamW(learning_rate=0.1, + weight_decay=0.01) + + optimizer.minimize(cost) + """ + if not issubclass(base_optimizer, paddle.fluid.optimizer.Optimizer): + raise TypeError( + "The input(base_optimizer) should be a derived class of Optimizer.") + + class OptimizerWithDecoupledWeightDecay(DecoupledWeightDecay, + base_optimizer): + """ + OptimizerWithDecoupledWeightDecay is used to update the optimized parameters + with the parameters before optimization. For more information, please refer: + https://arxiv.org/pdf/1711.05101.pdf. + + Args: + weight_decay (float|Variable): The weight decay coefficient, it can be + float or Variable. + apply_decay_param_fun (function|None): If it is not None, + only variables that makes apply_decay_param_fun(variable)==True + will be updated. It only works when we want to specify variables. + Default: None. + """ + + def __init__(self, weight_decay, apply_decay_param_fun=None, **kwargs): + super(OptimizerWithDecoupledWeightDecay, self).__init__( + weight_decay, apply_decay_param_fun, **kwargs) + + return OptimizerWithDecoupledWeightDecay diff --git a/python/paddle/fluid/contrib/model_stat.py b/python/paddle/fluid/contrib/model_stat.py new file mode 100644 index 0000000000000000000000000000000000000000..0d974c8d9685840c79de17f297fcba00b01a6c35 --- /dev/null +++ b/python/paddle/fluid/contrib/model_stat.py @@ -0,0 +1,194 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +''' +Example: + >>from paddle.fluid.contrib.model_stat import summary + >>main_program = ... + >>summary(main_program) + +-----+------------+----------------+----------------+---------+------------+ + | No. | TYPE | INPUT | OUTPUT | PARAMs | FLOPs | + +-----+------------+----------------+----------------+---------+------------+ + | 0 | conv2d | (3, 200, 200) | (64, 100, 100) | 9408 | 188160000 | + | 1 | batch_norm | (64, 100, 100) | (64, 100, 100) | 256 | 640000 | + | 2 | relu | (64, 100, 100) | (64, 100, 100) | 0 | 640000 | + | 3 | pool2d | (64, 100, 100) | (64, 50, 50) | 0 | 1440000 | + ... + | 176 | conv2d | (512, 7, 7) | (512, 7, 7) | 2359296 | 231211008 | + | 177 | relu | (512, 7, 7) | (512, 7, 7) | 0 | 25088 | + | 178 | conv2d | (512, 7, 7) | (2048, 7, 7) | 1048576 | 102760448 | + | 179 | relu | (2048, 7, 7) | (2048, 7, 7) | 0 | 100352 | + | 180 | pool2d | (2048, 7, 7) | (2048, 1, 1) | 0 | 100352 | + +-----+------------+----------------+----------------+---------+------------+ + Total PARAMs: 48017344(0.0480G) + Total FLOPs: 11692747751(11.69G) +''' +from collections import OrderedDict +from prettytable import PrettyTable + + +def summary(main_prog): + ''' + It can summary model's PARAMS, FLOPs until now. + It support common operator like conv, fc, pool, relu, sigmoid, bn etc. + Args: + main_prog: main program + Returns: + print summary on terminal + ''' + collected_ops_list = [] + for one_b in main_prog.blocks: + block_vars = one_b.vars + for one_op in one_b.ops: + op_info = OrderedDict() + spf_res = _summary_model(block_vars, one_op) + if spf_res is None: + continue + # TODO: get the operator name + op_info['type'] = one_op.type + op_info['input_shape'] = spf_res[0][1:] + op_info['out_shape'] = spf_res[1][1:] + op_info['PARAMs'] = spf_res[2] + op_info['FLOPs'] = spf_res[3] + collected_ops_list.append(op_info) + + summary_table, total = _format_summary(collected_ops_list) + _print_summary(summary_table, total) + + +def _summary_model(block_vars, one_op): + ''' + Compute operator's params and flops. + Args: + block_vars: all vars of one block + one_op: one operator to count + Returns: + in_data_shape: one operator's input data shape + out_data_shape: one operator's output data shape + params: one operator's PARAMs + flops: : one operator's FLOPs + ''' + if one_op.type in ['conv2d', 'depthwise_conv2d']: + k_arg_shape = block_vars[one_op.input("Filter")[0]].shape + in_data_shape = block_vars[one_op.input("Input")[0]].shape + out_data_shape = block_vars[one_op.output("Output")[0]].shape + c_out, c_in, k_h, k_w = k_arg_shape + _, c_out_, h_out, w_out = out_data_shape + assert c_out == c_out_, 'shape error!' + k_groups = one_op.attr("groups") + kernel_ops = k_h * k_w * (c_in / k_groups) + bias_ops = 0 if one_op.input("Bias") == [] else 1 + params = c_out * (kernel_ops + bias_ops) + flops = h_out * w_out * c_out * (kernel_ops + bias_ops) + # base nvidia paper, include mul and add + flops = 2 * flops + + elif one_op.type == 'pool2d': + in_data_shape = block_vars[one_op.input("X")[0]].shape + out_data_shape = block_vars[one_op.output("Out")[0]].shape + _, c_out, h_out, w_out = out_data_shape + k_size = one_op.attr("ksize") + params = 0 + flops = h_out * w_out * c_out * (k_size[0] * k_size[1]) + + elif one_op.type == 'mul': + k_arg_shape = block_vars[one_op.input("Y")[0]].shape + in_data_shape = block_vars[one_op.input("X")[0]].shape + out_data_shape = block_vars[one_op.output("Out")[0]].shape + # TODO: fc has mul ops + # add attr to mul op, tell us whether it belongs to 'fc' + # this's not the best way + if 'fc' not in one_op.output("Out")[0]: + return None + k_in, k_out = k_arg_shape + # bias in sum op + params = k_in * k_out + 1 + flops = k_in * k_out + + elif one_op.type in ['sigmoid', 'tanh', 'relu', 'leaky_relu', 'prelu']: + in_data_shape = block_vars[one_op.input("X")[0]].shape + out_data_shape = block_vars[one_op.output("Out")[0]].shape + params = 0 + if one_op.type == 'prelu': + params = 1 + flops = 1 + for one_dim in in_data_shape: + flops *= one_dim + + elif one_op.type == 'batch_norm': + in_data_shape = block_vars[one_op.input("X")[0]].shape + out_data_shape = block_vars[one_op.output("Y")[0]].shape + _, c_in, h_out, w_out = in_data_shape + # gamma, beta + params = c_in * 2 + # compute mean and std + flops = h_out * w_out * c_in * 2 + + else: + return None + + return in_data_shape, out_data_shape, params, flops + + +def _format_summary(collected_ops_list): + ''' + Format summary report. + Args: + collected_ops_list: the collected operator with summary + Returns: + summary_table: summary report format + total: sum param and flops + ''' + summary_table = PrettyTable( + ["No.", "TYPE", "INPUT", "OUTPUT", "PARAMs", "FLOPs"]) + summary_table.align = 'r' + + total = {} + total_params = [] + total_flops = [] + for i, one_op in enumerate(collected_ops_list): + # notice the order + table_row = [ + i, + one_op['type'], + one_op['input_shape'], + one_op['out_shape'], + int(one_op['PARAMs']), + int(one_op['FLOPs']), + ] + summary_table.add_row(table_row) + total_params.append(int(one_op['PARAMs'])) + total_flops.append(int(one_op['FLOPs'])) + + total['params'] = total_params + total['flops'] = total_flops + + return summary_table, total + + +def _print_summary(summary_table, total): + ''' + Print all the summary on terminal. + Args: + summary_table: summary report format + total: sum param and flops + ''' + parmas = total['params'] + flops = total['flops'] + print(summary_table) + print('Total PARAMs: {}({:.4f}M)'.format( + sum(parmas), sum(parmas) / (10**6))) + print('Total FLOPs: {}({:.2f}G)'.format(sum(flops), sum(flops) / 10**9)) + print( + "Notice: \n now supported ops include [Conv, DepthwiseConv, FC(mul), BatchNorm, Pool, Activation(sigmoid, tanh, relu, leaky_relu, prelu)]" + ) diff --git a/python/paddle/fluid/contrib/slim/distillation/distiller.py b/python/paddle/fluid/contrib/slim/distillation/distiller.py index 13bb35a8be73ed29e907308d08a33cdc13dee069..3dccfa7e98d4dd5cfb724d8a8f35b8cfdbe6e468 100644 --- a/python/paddle/fluid/contrib/slim/distillation/distiller.py +++ b/python/paddle/fluid/contrib/slim/distillation/distiller.py @@ -19,7 +19,7 @@ from .... import Program from .... import program_guard from .... import regularizer -__all__ = ['FSPDistiller', 'L2Distiller'] +__all__ = ['FSPDistiller', 'L2Distiller', 'SoftLabelDistiller'] class L2Distiller(object): @@ -186,3 +186,91 @@ class FSPDistillerPass(object): def _fsp_matrix(self, fea_map_0, fea_map_1): return layers.fsp_matrix(fea_map_0, fea_map_1) + + +class SoftLabelDistiller(object): + """ + Combine two layers from student net and teacher net by softmax_with_cross_entropy loss. + And add the loss into the total loss using for distillation training. + """ + + def __init__(self, + student_feature_map=None, + teacher_feature_map=None, + student_temperature=1.0, + teacher_temperature=1.0, + distillation_loss_weight=1): + """ + Args: + student_feature_map(str): The name of feature map from student network. + teacher_feature_map(str): The name of feature map from teacher network. + It's shape should be the same with student network. + student_temperature(float): Temperature used to divide student_feature_map before softmax_with_cross_entropy. default: 1.0 + teacher_temperature(float): Temperature used to divide teacher_feature_map before softmax_with_cross_entropy. default: 1.0 + distillation_loss_weight(float): The weight of the l2-loss. + """ + + self.student_feature_map = student_feature_map + self.teacher_feature_map = teacher_feature_map + self.distillation_loss_weight = distillation_loss_weight + self.student_temperature = student_temperature + self.teacher_temperature = teacher_temperature + + def distiller_loss(self, graph): + """ + Modify graph inplace to add softmax_with_cross_entropy loss. + Args: + graph(GraphWrapper): The graph to be modified. + Returns: + GraphWrapper: The modified graph. + """ + distiller_pass = SoftLabelDistillerPass( + self.student_feature_map, self.teacher_feature_map, + self.student_temperature, self.teacher_temperature, + self.distillation_loss_weight) + dis_graph = distiller_pass.apply(graph) + return dis_graph + + +class SoftLabelDistillerPass(object): + def __init__(self, + student_feature_map, + teacher_feature_map, + student_temperature, + teacher_temperature, + distillation_loss_weight=1): + """ + Args: + student_feature_map(str): The name of feature map from student network. + teacher_feature_map(str): The name of feature map from teacher network. + It's shape should be the same with student network. + student_temperature(float): Temperature used to divide student_feature_map before softmax_with_cross_entropy. + teacher_temperature(float): Temperature used to divide teacher_feature_map before softmax_with_cross_entropy. + distillation_loss_weight(float): The weight of the l2-loss. + """ + self.student_feature_map = student_feature_map + self.teacher_feature_map = teacher_feature_map + self.student_temperature = student_temperature + self.teacher_temperature = teacher_temperature + self.distillation_loss_weight = distillation_loss_weight + + def apply(self, graph): + ret_graph = graph + with program_guard(ret_graph.program): + + student_feature_map = ret_graph.var(self.student_feature_map)._var + teacher_feature_map = ret_graph.var(self.teacher_feature_map)._var + s_fea = student_feature_map / self.student_temperature + t_fea = teacher_feature_map / self.distillation_loss_weight + t_fea.stop_gradient = True + ce_loss = layers.softmax_with_cross_entropy( + s_fea, t_fea, soft_label=True) + distillation_loss = ce_loss * self.distillation_loss_weight + student_loss = ret_graph.var(ret_graph.out_nodes['loss'])._var + loss = distillation_loss + student_loss + + ret_graph.out_nodes[ + 'soft_label_loss_' + self.student_feature_map + "_" + + self.teacher_feature_map] = distillation_loss.name + ret_graph.out_nodes['loss'] = loss.name + return ret_graph diff --git a/python/paddle/fluid/contrib/slim/graph/graph_wrapper.py b/python/paddle/fluid/contrib/slim/graph/graph_wrapper.py index 7388ecd3b096fc05d1420b904f2d65d805c3fc53..e7f5f0d6a2185521549abe7af7b6be2b0b7d90fb 100644 --- a/python/paddle/fluid/contrib/slim/graph/graph_wrapper.py +++ b/python/paddle/fluid/contrib/slim/graph/graph_wrapper.py @@ -204,6 +204,10 @@ class GraphWrapper(object): """ super(GraphWrapper, self).__init__() self.program = Program() if program is None else program + self.persistables = {} + for var in self.program.list_vars(): + if var.persistable: + self.persistables[var.name] = var self.compiled_graph = None self.in_nodes = OrderedDict(in_nodes) self.out_nodes = OrderedDict(out_nodes) @@ -467,7 +471,12 @@ class GraphWrapper(object): path(str): The path to save the persistables. exe(framework.Executor): The executor used to save the persistables. """ - io.save_persistables(exe.exe, path, main_program=self.program) + # update persistables from program + for var in self.program.list_vars(): + if var.persistable and var.name not in self.persistables: + self.persistables[var.name] = var + + io.save_vars(exe.exe, path, vars=self.persistables.values()) def load_persistables(self, path, exe): """ @@ -481,7 +490,7 @@ class GraphWrapper(object): return os.path.exists(os.path.join(path, var.name)) io.load_vars( - exe.exe, path, main_program=self.program, predicate=if_exist) + exe.exe, path, vars=self.persistables.values(), predicate=if_exist) def update_param_shape(self, scope): """ diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_strategy.py b/python/paddle/fluid/contrib/slim/quantization/quantization_strategy.py index aa50891121f5454cf6bd43264a9ee86e8e0717ed..a22b6da020510838dc82fe7af87ab62db6e874ef 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_strategy.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_strategy.py @@ -20,7 +20,7 @@ from .... import io from .... import core from ....compiler import CompiledProgram from ....compiler import BuildStrategy -from ....framework import IrGraph +from ....framework import IrGraph, Variable, Program from ..core.strategy import Strategy from .quantization_pass import * @@ -88,41 +88,76 @@ class QuantizationStrategy(Strategy): self.save_out_nodes = save_out_nodes self.save_in_nodes = save_in_nodes + def on_compression_begin(self, context): + """ + Restore graph when the compressoin task is inited from checkpoint. + """ + # It is inited from checkpoint and has missed start epoch. + if context.epoch_id != 0 and context.epoch_id > self.start_epoch: + _logger.info("Restore quantization task from checkpoint") + self._modify_graph_for_quantization(context) + _logger.info("Finish restoring quantization task from checkpoint") + + def _modify_graph_for_quantization(self, context): + """ + Insert fake_quantize_op and fake_dequantize_op before trainging and testing. + """ + train_ir_graph = IrGraph( + core.Graph(context.optimize_graph.program.clone().desc), + for_test=False) + test_ir_graph = IrGraph( + core.Graph(context.eval_graph.program.clone().desc), for_test=True) + transform_pass = QuantizationTransformPass( + scope=context.scope, + place=context.place, + weight_bits=self.weight_bits, + activation_bits=self.activation_bits, + activation_quantize_type=self.activation_quantize_type, + weight_quantize_type=self.weight_quantize_type) + transform_pass.apply(train_ir_graph) + transform_pass.apply(test_ir_graph) + # Put persistables created by transform_pass into context.optimize_graph.persistables + # for saving checkpoint. + program_persistables = set() + for var in context.optimize_graph.program.list_vars(): + if var.persistable: + program_persistables.add(var.name) + + program = Program() + for var_node in train_ir_graph.all_persistable_nodes(): + if var_node.name() not in program_persistables: + var_desc = var_node.var() + var = program.global_block().create_var( + name=var_node.name(), + shape=var_desc.shape(), + dtype=var_desc.dtype(), + type=var_desc.type(), + lod_level=var_desc.lod_level()) + context.optimize_graph.persistables[var.name] = var + + build_strategy = BuildStrategy() + build_strategy.enable_inplace = False + build_strategy.memory_optimize = False + # for quantization training + context.optimize_graph.compiled_graph = CompiledProgram( + train_ir_graph.graph).with_data_parallel( + loss_name=context.optimize_graph.out_nodes['loss'], + build_strategy=build_strategy) + # for evaluation. And program compiled from ir graph must be with data parallel. + context.eval_graph.compiled_graph = CompiledProgram( + test_ir_graph.graph).with_data_parallel( + build_strategy=build_strategy) + # for saving inference model after training + context.put('quantization_test_ir_graph_backup', test_ir_graph) + def on_epoch_begin(self, context): """ Insert fake_quantize_op and fake_dequantize_op before trainging and testing. """ - super(QuantizationStrategy, self).on_compression_begin(context) + super(QuantizationStrategy, self).on_epoch_begin(context) if self.start_epoch == context.epoch_id: _logger.info('QuantizationStrategy::on_epoch_begin') - train_ir_graph = IrGraph( - core.Graph(context.optimize_graph.program.desc), for_test=False) - test_ir_graph = IrGraph( - core.Graph(context.eval_graph.program.desc), for_test=True) - transform_pass = QuantizationTransformPass( - scope=context.scope, - place=context.place, - weight_bits=self.weight_bits, - activation_bits=self.activation_bits, - activation_quantize_type=self.activation_quantize_type, - weight_quantize_type=self.weight_quantize_type) - transform_pass.apply(train_ir_graph) - transform_pass.apply(test_ir_graph) - - build_strategy = BuildStrategy() - build_strategy.enable_inplace = False - build_strategy.memory_optimize = False - # for quantization training - context.optimize_graph.compiled_graph = CompiledProgram( - train_ir_graph.graph).with_data_parallel( - loss_name=context.optimize_graph.out_nodes['loss'], - build_strategy=build_strategy) - # for evaluation. And program compiled from ir graph must be with data parallel. - context.eval_graph.compiled_graph = CompiledProgram( - test_ir_graph.graph).with_data_parallel( - build_strategy=build_strategy) - # for saving inference model after training - context.put('quantization_test_ir_graph_backup', test_ir_graph) + self._modify_graph_for_quantization(context) _logger.info('Finish QuantizationStrategy::on_epoch_begin') def on_epoch_end(self, context): diff --git a/python/paddle/fluid/contrib/slim/tests/distillation/compress.yaml b/python/paddle/fluid/contrib/slim/tests/distillation/compress.yaml index ef89dfb7801e6df8a2cf842a5fcc745d70254977..07ccb7a21db566835aed3b56284ea1d72ad6e222 100644 --- a/python/paddle/fluid/contrib/slim/tests/distillation/compress.yaml +++ b/python/paddle/fluid/contrib/slim/tests/distillation/compress.yaml @@ -33,10 +33,17 @@ distillers: teacher_feature_map: 'teacher.tmp_2' student_feature_map: 'student.tmp_2' distillation_loss_weight: 1 + soft_label_distiller: + class: 'SoftLabelDistiller' + student_temperature: 1.0 + teacher_temperature: 1.0 + teacher_feature_map: 'teacher.tmp_1' + student_feature_map: 'student.tmp_1' + distillation_loss_weight: 0.001 strategies: distillation_strategy: class: 'DistillationStrategy' - distillers: ['fsp_distiller', 'l2_distiller'] + distillers: ['fsp_distiller', 'l2_distiller', 'soft_label_distiller'] start_epoch: 0 end_epoch: 1 compressor: diff --git a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py new file mode 100644 index 0000000000000000000000000000000000000000..2b331308de5ee9a8aa52a9e303bfbcf8d4264d5f --- /dev/null +++ b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py @@ -0,0 +1,151 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +from functools import partial +import numpy as np +import paddle +import paddle.fluid as fluid +import contextlib + + +def get_places(): + places = [fluid.CPUPlace()] + if fluid.core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + return places + + +@contextlib.contextmanager +def prog_scope_guard(main_prog, startup_prog): + scope = fluid.core.Scope() + with fluid.unique_name.guard(): + with fluid.scope_guard(scope): + with fluid.program_guard(main_prog, startup_prog): + yield + + +def bow_net(data, + label, + dict_dim, + is_sparse=False, + emb_dim=128, + hid_dim=128, + hid_dim2=96, + class_dim=2): + """ + BOW net + This model is from https://github.com/PaddlePaddle/models: + fluid/PaddleNLP/text_classification/nets.py + """ + emb = fluid.layers.embedding( + input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim]) + bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') + bow_tanh = fluid.layers.tanh(bow) + fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh") + fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh") + prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + + return avg_cost + + +class TestWeightDecay(unittest.TestCase): + def setUp(self): + self.word_dict = paddle.dataset.imdb.word_dict() + reader = paddle.batch( + paddle.dataset.imdb.train(self.word_dict), batch_size=2)() + self.train_data = [next(reader) for _ in range(5)] + self.learning_rate = .5 + + def run_program(self, place, feed_list): + exe = fluid.Executor(place) + feeder = fluid.DataFeeder(feed_list=feed_list, place=place) + exe.run(fluid.default_startup_program()) + + main_prog = fluid.default_main_program() + param_list = [var.name for var in main_prog.block(0).all_parameters()] + + param_sum = [] + for data in self.train_data: + out = exe.run(main_prog, + feed=feeder.feed(data), + fetch_list=param_list) + p_sum = 0 + for v in out: + p_sum += np.sum(np.abs(v)) + param_sum.append(p_sum) + return param_sum + + def check_weight_decay(self, place, model): + main_prog = fluid.framework.Program() + startup_prog = fluid.framework.Program() + startup_prog.random_seed = 1 + with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + avg_cost = model(data, label, len(self.word_dict)) + AdamW = fluid.contrib.extend_with_decoupled_weight_decay( + fluid.optimizer.Adam) + + optimizer = AdamW( + learning_rate=self.learning_rate, + weight_decay=self.learning_rate) + + optimizer.minimize(avg_cost) + param_sum = self.run_program(place, [data, label]) + + return param_sum + + def check_weight_decay2(self, place, model): + main_prog = fluid.framework.Program() + startup_prog = fluid.framework.Program() + startup_prog.random_seed = 1 + with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + + avg_cost = model(data, label, len(self.word_dict)) + + param_list = [(var, var * self.learning_rate) + for var in main_prog.block(0).all_parameters()] + + optimizer = fluid.optimizer.Adam(learning_rate=self.learning_rate) + + optimizer.minimize(avg_cost) + for params in param_list: + updated_p = fluid.layers.elementwise_sub( + x=params[0], y=params[1]) + fluid.layers.assign(input=updated_p, output=params[0]) + + param_sum = self.run_program(place, [data, label]) + return param_sum + + def test_weight_decay(self): + for place in get_places(): + model = partial(bow_net, is_sparse=False) + param_sum1 = self.check_weight_decay(place, model) + param_sum2 = self.check_weight_decay2(place, model) + + for i in range(len(param_sum1)): + assert np.isclose(a=param_sum1[i], b=param_sum2[i], rtol=5e-5) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/imperative/__init__.py b/python/paddle/fluid/dygraph/__init__.py similarity index 89% rename from python/paddle/fluid/imperative/__init__.py rename to python/paddle/fluid/dygraph/__init__.py index 7281b3ea4b961a14126023a14a2ba2f03c7d1387..2d0c7b7ddaacee28da599d5850e9b3381c01de5c 100644 --- a/python/paddle/fluid/imperative/__init__.py +++ b/python/paddle/fluid/dygraph/__init__.py @@ -32,6 +32,9 @@ from .profiler import * from . import checkpoint from .checkpoint import * +from . import learning_rate_scheduler +from .learning_rate_scheduler import * + __all__ = [] __all__ += layers.__all__ __all__ += base.__all__ @@ -39,3 +42,4 @@ __all__ += nn.__all__ __all__ += tracer.__all__ __all__ += profiler.__all__ __all__ += checkpoint.__all__ +__all__ += learning_rate_scheduler.__all__ diff --git a/python/paddle/fluid/imperative/base.py b/python/paddle/fluid/dygraph/base.py similarity index 88% rename from python/paddle/fluid/imperative/base.py rename to python/paddle/fluid/dygraph/base.py index 097cd2be35b01aced30486b874f202381c4d9962..d55dbbb9c72cb887e169849c3a3e32a13c202a7b 100644 --- a/python/paddle/fluid/imperative/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -22,7 +22,7 @@ __all__ = ['enabled', 'guard', 'to_variable'] def enabled(): - return framework._in_imperative_mode() + return framework._in_dygraph_mode() @signature_safe_contextmanager @@ -39,14 +39,14 @@ def guard(place=None): with framework.program_guard(train, startup): with framework.unique_name.guard(): - with framework._imperative_guard(tracer): - with framework._imperative_place_guard(place): + with framework._dygraph_guard(tracer): + with framework._dygraph_place_guard(place): yield def to_variable(value, block=None, name=None): if isinstance(value, np.ndarray): - assert enabled(), "to_variable could only be called in imperative mode" + assert enabled(), "to_variable could only be called in dygraph mode" if not block: block = framework.default_main_program().current_block() diff --git a/python/paddle/fluid/imperative/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py similarity index 93% rename from python/paddle/fluid/imperative/checkpoint.py rename to python/paddle/fluid/dygraph/checkpoint.py index 37c43f29d2ae9214058238e4f834dbbcd9e42df1..f992ae0576c81ed98a3e9f7a446b0c2e808622ea 100644 --- a/python/paddle/fluid/imperative/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -68,7 +68,7 @@ def save_persistables(vardict, dirname, filename=None): dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, init_cell) param_path = "./my_paddle_model" - fluid.imperative.checkpoint.save_persistables(ptb_model.state_dict(), dirname=param_path, + fluid.dygraph.save_persistables(ptb_model.state_dict(), dirname=param_path, layer=ptb_model) """ if isinstance(vardict, collections.OrderedDict): @@ -97,17 +97,17 @@ def load_persistables(vardict, dirname, filename=None): Examples: .. code-block:: python - my_layer = layer(fluid.imperative.Layer) + my_layer = layer(fluid.dygraph.Layer) param_path = "./my_paddle_model" - param_dict = fluid.imperative.checkpoint.load_persistables(my_layer.parameters(), param_path) + param_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path) param_1 = param_dict['PtbModel_0.w_1'] or: - my_layer = layer(fluid.imperative.Layer) + my_layer = layer(fluid.dygraph.Layer) param_path = "./my_paddle_model" filename = "model.file" - param_dict = fluid.imperative.checkpoint.load_persistables(my_layer.state_dict(), param_path, + param_dict = fluid.dygraph.load_persistables(my_layer.state_dict(), param_path, filename=filename) param_1 = param_dict['PtbModel_0.w_1'] diff --git a/python/paddle/fluid/imperative/layer_object_helper.py b/python/paddle/fluid/dygraph/layer_object_helper.py similarity index 99% rename from python/paddle/fluid/imperative/layer_object_helper.py rename to python/paddle/fluid/dygraph/layer_object_helper.py index 3d4426e8cdfe79a6fa2d6452e7cb3ab0a458c0bc..c56652e103ce93bf5459b30b66c7b1f04e7c14d0 100644 --- a/python/paddle/fluid/imperative/layer_object_helper.py +++ b/python/paddle/fluid/dygraph/layer_object_helper.py @@ -16,7 +16,7 @@ from __future__ import print_function import copy import six -from ..framework import Parameter, _in_imperative_mode +from ..framework import Parameter, _in_dygraph_mode from ..param_attr import ParamAttr from .. import core from six.moves import zip diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/dygraph/layers.py similarity index 99% rename from python/paddle/fluid/imperative/layers.py rename to python/paddle/fluid/dygraph/layers.py index e64667f7f467d0d0a3c07d14ce22c3f231e82eb6..014ee41f4c5aa280fb5b366d8f1704290cc067d4 100644 --- a/python/paddle/fluid/imperative/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -283,7 +283,7 @@ class PyLayer(core.PyLayer): @classmethod def __call__(cls, *inputs): - tracer = framework._imperative_tracer() + tracer = framework._dygraph_tracer() block = framework.default_main_program().current_block() ivar_inputs = [x._ivar for x in inputs] diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..3209fa76d95c35c6c5a1bb36801b9f9354b1a927 --- /dev/null +++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py @@ -0,0 +1,224 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import math + +from .. import unique_name + +__all__ = [ + 'NoamDecay', 'PiecewiseDecay', 'NaturalExpDecay', 'ExponentialDecay', + 'InverseTimeDecay', 'PolynomialDecay', 'CosineDecay' +] + + +class LearningRateDecay(object): + """ + Base class of learning rate decay + """ + + def __init__(self, begin=0, step=1, dtype='float32'): + self.step_num = begin + self.step_size = step + self.dtype = dtype + + def __call__(self): + lr = self.step() + if isinstance(lr, float): + lr = self.create_lr_var(lr) + self.step_num += self.step_size + return lr + + def create_lr_var(self, lr): + from .. import layers + lr = layers.create_global_var( + name=unique_name.generate("learning_rate"), + shape=[1], + value=float(lr), + dtype=self.dtype, + persistable=True) + return lr + + def step(self): + raise NotImplementedError() + + +class PiecewiseDecay(LearningRateDecay): + def __init__(self, boundaries, values, begin, step=1, dtype='float32'): + super(PiecewiseDecay, self).__init__(begin, step, dtype) + self.boundaries = boundaries + self.values = values + + self.vars = [] + for value in values: + self.vars.append(self.create_lr_var(value)) + + def step(self): + for i in range(len(self.boundaries)): + if self.step_num < self.boundaries[i]: + return self.vars[i] + return self.vars[len(self.values) - 1] + + +class NaturalExpDecay(LearningRateDecay): + def __init__(self, + learning_rate, + decay_steps, + decay_rate, + staircase=False, + begin=0, + step=1, + dtype='float32'): + super(NaturalExpDecay, self).__init__(begin, step, dtype) + self.learning_rate = learning_rate + self.decay_steps = decay_steps + self.decay_rate = decay_rate + self.staircase = staircase + + def step(self): + from .. import layers + div_res = self.create_lr_var(self.step_num / self.decay_steps) + if self.staircase: + div_res = layers.floor(div_res) + decayed_lr = self.learning_rate * layers.exp(-1 * self.decay_rate * + div_res) + + return decayed_lr + + +class ExponentialDecay(LearningRateDecay): + def __init__(self, + learning_rate, + decay_steps, + decay_rate, + staircase=False, + begin=0, + step=1, + dtype='float32'): + super(ExponentialDecay, self).__init__(begin, step, dtype) + self.learning_rate = learning_rate + self.decay_steps = decay_steps + self.decay_rate = decay_rate + self.staircase = staircase + + def step(self): + from .. import layers + div_res = self.create_lr_var(self.step_num / self.decay_steps) + if self.staircase: + div_res = layers.floor(div_res) + + decayed_lr = self.learning_rate * (self.decay_rate**div_res) + + return decayed_lr + + +class InverseTimeDecay(LearningRateDecay): + def __init__(self, + learning_rate, + decay_steps, + decay_rate, + staircase=False, + begin=0, + step=1, + dtype='float32'): + super(InverseTimeDecay, self).__init__(begin, step, dtype) + self.learning_rate = learning_rate + self.decay_steps = decay_steps + self.decay_rate = decay_rate + self.staircase = staircase + + def step(self): + from .. import layers + div_res = self.create_lr_var(self.step_num / self.decay_steps) + if self.staircase: + div_res = layers.floor(div_res) + + decayed_lr = self.learning_rate / (1 + self.decay_rate * div_res) + + return decayed_lr + + +class PolynomialDecay(LearningRateDecay): + def __init__(self, + learning_rate, + decay_steps, + end_learning_rate=0.0001, + power=1.0, + cycle=False, + begin=0, + step=1, + dtype='float32'): + super(PolynomialDecay, self).__init__(begin, step, dtype) + self.learning_rate = learning_rate + self.decay_steps = decay_steps + self.end_learning_rate = end_learning_rate + self.power = power + self.cycle = cycle + + def step(self): + from .. import layers + tmp_step_num = self.step_num + tmp_decay_steps = self.decay_steps + if self.cycle: + div_res = layers.ceil( + self.create_lr_var(tmp_step_num / float(self.decay_steps))) + + if tmp_step_num == 0: + div_res = self.create_lr_var(1.0) + tmp_decay_steps = self.decay_steps * div_res + else: + tmp_step_num = self.create_lr_var(tmp_step_num + if tmp_step_num < self.decay_steps + else self.decay_steps) + + decayed_lr = (self.learning_rate - self.end_learning_rate) * \ + ((1 - tmp_step_num / tmp_decay_steps) ** self.power) + self.end_learning_rate + return decayed_lr + + +class CosineDecay(LearningRateDecay): + def __init__(self, + learning_rate, + step_each_epoch, + epochs, + begin=0, + step=1, + dtype='float32'): + super(CosineDecay, self).__init__(begin, step, dtype) + self.learning_rate = learning_rate + self.step_each_epoch = step_each_epoch + self.epochs = epochs + + def step(self): + from .. import layers + cur_epoch = layers.floor( + self.create_lr_var(self.step_num / self.step_each_epoch)) + decayed_lr = self.learning_rate * 0.5 * ( + layers.cos(cur_epoch * math.pi / self.epochs) + 1) + return decayed_lr + + +class NoamDecay(LearningRateDecay): + def __init__(self, d_model, warmup_steps, begin=1, step=1, dtype='float32'): + super(NoamDecay, self).__init__(begin, step, dtype) + self.d_model = d_model + self.warmup_steps = warmup_steps + + def step(self): + from .. import layers + a = self.create_lr_var(self.step_num**-0.5) + b = self.create_lr_var((self.warmup_steps**-1.5) * self.step_num) + lr_value = (self.d_model**-0.5) * layers.elementwise_min(a, b) + return lr_value diff --git a/python/paddle/fluid/imperative/nn.py b/python/paddle/fluid/dygraph/nn.py similarity index 99% rename from python/paddle/fluid/imperative/nn.py rename to python/paddle/fluid/dygraph/nn.py index 9856276b20b7affb548847d359463451bb238518..8925381119272d7462562c0952d3e157f78f25af 100644 --- a/python/paddle/fluid/imperative/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -133,7 +133,7 @@ class Conv2D(layers.Layer): outputs={'Out': [pre_act]}, attrs={'axis': 1}) - # Currently, we don't support inplace in imperative mode + # Currently, we don't support inplace in dygraph mode return self._helper.append_activation(pre_act, act=self._act) @@ -265,7 +265,7 @@ class FC(layers.Layer): attrs={'axis': self._num_flatten_dims}) else: pre_activation = pre_bias - # Currently, we don't support inplace in imperative mode + # Currently, we don't support inplace in dygraph mode return self._helper.append_activation(pre_activation, act=self._act) @@ -387,7 +387,7 @@ class BatchNorm(layers.Layer): "use_global_stats": self._use_global_stats }) - # Currently, we don't support inplace in imperative mode + # Currently, we don't support inplace in dygraph mode return self._helper.append_activation(batch_norm_out, self._act) @@ -426,7 +426,7 @@ class Embedding(layers.Layer): dict_size = len(dataset.ids) input = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32') - embedding = fluid.imperative.Embedding(size=[dict_size, 16]) + embedding = fluid.dygraph.Embedding(size=[dict_size, 16]) fc = embedding(input) """ diff --git a/python/paddle/fluid/imperative/profiler.py b/python/paddle/fluid/dygraph/profiler.py similarity index 100% rename from python/paddle/fluid/imperative/profiler.py rename to python/paddle/fluid/dygraph/profiler.py diff --git a/python/paddle/fluid/imperative/tracer.py b/python/paddle/fluid/dygraph/tracer.py similarity index 95% rename from python/paddle/fluid/imperative/tracer.py rename to python/paddle/fluid/dygraph/tracer.py index 28c8586813410f7349da7943a966eaa9cc3816d2..94e212b139b2b375aa9f5252d396e90235ba33c1 100644 --- a/python/paddle/fluid/imperative/tracer.py +++ b/python/paddle/fluid/dygraph/tracer.py @@ -24,12 +24,12 @@ __all__ = ['Tracer'] def release_op(op): - del framework._imperative_tracer()._ops[op._trace_id] + del framework._dygraph_tracer()._ops[op._trace_id] class Tracer(core.Tracer): """ - Python wrapper of imperative tracer + Python wrapper of dygraph tracer """ def __init__(self, block): diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index be8008cabd08de0cf957d93ca71ac38fc592af79..2c2881dedf2bf6d201c88149d47273b5df35bfee 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -75,20 +75,20 @@ GRAD_VAR_SUFFIX = core.kGradVarSuffix() ZERO_VAR_SUFFIX = core.kZeroVarSuffix() CONTROL_DEP_VAR_PREFIX = core.kControlDepVarName() -_imperative_tracer_ = None -_imperative_current_expected_place_ = None +_dygraph_tracer_ = None +_dygraph_current_expected_place_ = None -def _in_imperative_mode(): - return _imperative_tracer_ is not None +def _in_dygraph_mode(): + return _dygraph_tracer_ is not None -def _imperative_tracer(): - return _imperative_tracer_ +def _dygraph_tracer(): + return _dygraph_tracer_ def _current_expected_place(): - return _imperative_current_expected_place_ + return _dygraph_current_expected_place_ def _cpu_num(): @@ -396,7 +396,7 @@ class Variable(object): if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) - if _in_imperative_mode(): + if _in_dygraph_mode(): # record vars in tracer rather than blocks self._ivar = kwargs.get("ivar", None) if not self._ivar: @@ -406,7 +406,7 @@ class Variable(object): _current_expected_place(), stop_gradient, True if persistable else False) if persistable: - _imperative_tracer().trace_var(name, self) + _dygraph_tracer().trace_var(name, self) else: self.error_clip = error_clip @@ -515,8 +515,8 @@ class Variable(object): Returns: str: The debug string. """ - if _in_imperative_mode(): - # TODO(panyx0718): add more imperative debug info. + if _in_dygraph_mode(): + # TODO(panyx0718): add more dygraph debug info. return 'name %s, dtype: %s shape: %s' % (self.name, self.dtype, self.shape) @@ -548,42 +548,42 @@ class Variable(object): @property def _stop_gradient(self): - if _in_imperative_mode(): + if _in_dygraph_mode(): return self._ivar.stop_gradient else: return self.stop_gradient @_stop_gradient.setter def _stop_gradient(self, s): - if _in_imperative_mode(): + if _in_dygraph_mode(): self._ivar.stop_gradient = s else: self.stop_gradient = s @property def persistable(self): - if _in_imperative_mode(): + if _in_dygraph_mode(): return self._ivar.persistable else: return self.desc.persistable() @persistable.setter def persistable(self, p): - if _in_imperative_mode(): + if _in_dygraph_mode(): return self._ivar.persistable else: self.desc.set_persistable(p) @property def name(self): - if _in_imperative_mode(): + if _in_dygraph_mode(): return self._ivar.name else: return cpt.to_text(self.desc.name()) @name.setter def name(self, new_name): - if _in_imperative_mode(): + if _in_dygraph_mode(): self._ivar.name = new_name else: self.desc.set_name(new_name) @@ -591,26 +591,26 @@ class Variable(object): @property def shape(self): # convert to tuple, make it as same as numpy API. - if _in_imperative_mode(): + if _in_dygraph_mode(): return self._ivar.shape else: return tuple(self.desc.shape()) @property def dtype(self): - if _in_imperative_mode(): + if _in_dygraph_mode(): return self._ivar.dtype else: return self.desc.dtype() @property def lod_level(self): - # TODO(minqiyang): Support lod_level in imperative mode + # TODO(minqiyang): Support lod_level in dygraph mode return self.desc.lod_level() @property def type(self): - if _in_imperative_mode(): + if _in_dygraph_mode(): return self._ivar.dtype else: return self.desc.type() @@ -789,13 +789,24 @@ class Variable(object): if isinstance(item, tuple): if len(item) > len(self.shape): raise IndexError("Too many indexes") + fixedSize = True + for i in range(len(self.shape)): + if self.shape[i] == -1: + fixedSize = False + break + newitem = self._reconstructSliceinfo(item) or item - check, info = self._detectContinuesSlice(newitem) - if check: - starts = info[0] - ends = info[1] - axes = [i for i in range(len(starts))] - return self._sliceVar(axes, starts, ends) + if fixedSize: + check, info = self._detectContinuesSlice(newitem) + if check: + starts = info[0] + ends = info[1] + axes = [i for i in range(len(starts))] + return self._sliceVar(axes, starts, ends) + else: + new_var = self + for index, o in enumerate(newitem): + new_var = new_var._sliceAndConcatVar(o, index) else: new_var = self for index, o in enumerate(newitem): @@ -918,7 +929,7 @@ class Operator(object): inputs=None, outputs=None, attrs=None): - if _in_imperative_mode(): + if _in_dygraph_mode(): if type is None: raise ValueError( "`type` to initialized an Operator can not be None.") @@ -1037,7 +1048,7 @@ class Operator(object): for arg in out_args: out_arg_names.append(cpt.to_text(arg.name)) # TODO(minqiyang): could we remove variable's op in static mode? - if not _in_imperative_mode(): + if not _in_dygraph_mode(): arg.op = self self.desc.set_output(out_proto.name, out_arg_names) @@ -1083,7 +1094,7 @@ class Operator(object): @property def type(self): - if _in_imperative_mode(): + if _in_dygraph_mode(): return self.iop.type else: return self.desc.type() @@ -1626,7 +1637,7 @@ class Block(object): Returns: Operator: the append Operator. """ - if _in_imperative_mode(): + if _in_dygraph_mode(): op = Operator( block=self, desc=None, @@ -1638,9 +1649,8 @@ class Block(object): # record ops in tracer rather than blocks # # TODO(minqiyang): add op stop_gradient support in static mode too. - # currently, we only support stop_gradient in imperative mode. - _imperative_tracer().trace_op(op, - kwargs.get("stop_gradient", False)) + # currently, we only support stop_gradient in dygraph mode. + _dygraph_tracer().trace_op(op, kwargs.get("stop_gradient", False)) else: op_desc = self.desc.append_op() op = Operator( @@ -1699,7 +1709,7 @@ class Block(object): return self.ops[start:end] def _prepend_op(self, *args, **kwargs): - if _in_imperative_mode(): + if _in_dygraph_mode(): op = Operator( self, None, @@ -1707,8 +1717,7 @@ class Block(object): inputs=kwargs.get("inputs", None), outputs=kwargs.get("outputs", None), attrs=kwargs.get("attrs", None)) - _imperative_tracer().trace_op(op, - kwargs.get("stop_gradient", False)) + _dygraph_tracer().trace_op(op, kwargs.get("stop_gradient", False)) else: op_desc = self.desc._prepend_op() op = Operator( @@ -3511,22 +3520,22 @@ def _get_var(name, program=None): @signature_safe_contextmanager -def _imperative_guard(tracer): - global _imperative_tracer_ - tmp_trace = _imperative_tracer_ - _imperative_tracer_ = tracer +def _dygraph_guard(tracer): + global _dygraph_tracer_ + tmp_trace = _dygraph_tracer_ + _dygraph_tracer_ = tracer yield - _imperative_tracer_ = tmp_trace + _dygraph_tracer_ = tmp_trace @signature_safe_contextmanager -def _imperative_place_guard(place): - global _imperative_current_expected_place_ - tmp_place = _imperative_current_expected_place_ - _imperative_current_expected_place_ = place +def _dygraph_place_guard(place): + global _dygraph_current_expected_place_ + tmp_place = _dygraph_current_expected_place_ + _dygraph_current_expected_place_ = place yield - _imperative_current_expected_place_ = tmp_place + _dygraph_current_expected_place_ = tmp_place diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 8358bb1aba98d8f5699cbda27e657ba6c470d333..6aff93dceaf5cfd299bdc9f68246ed579f248f3c 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -165,7 +165,7 @@ class ConstantInitializer(Initializer): 'force_cpu': self._force_cpu or force_init_on_cpu() }, stop_gradient=True) - if not framework._in_imperative_mode(): + if not framework._in_dygraph_mode(): var.op = op return op @@ -245,7 +245,7 @@ class UniformInitializer(Initializer): attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) - if not framework._in_imperative_mode(): + if not framework._in_dygraph_mode(): var.op = op return op @@ -324,7 +324,7 @@ class NormalInitializer(Initializer): outputs={"Out": var}, attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) - if not framework._in_imperative_mode(): + if not framework._in_dygraph_mode(): var.op = op return op @@ -403,7 +403,7 @@ class TruncatedNormalInitializer(Initializer): outputs={"Out": var}, attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) - if not framework._in_imperative_mode(): + if not framework._in_dygraph_mode(): var.op = op return op @@ -509,7 +509,7 @@ class XavierInitializer(Initializer): "seed": self._seed }, stop_gradient=True) - if not framework._in_imperative_mode(): + if not framework._in_dygraph_mode(): var.op = op return op @@ -610,7 +610,7 @@ class MSRAInitializer(Initializer): "seed": self._seed }, stop_gradient=True) - if not framework._in_imperative_mode(): + if not framework._in_dygraph_mode(): var.op = op return op @@ -709,7 +709,7 @@ class BilinearInitializer(Initializer): 'shape': list(shape), value_name: values }) - if not framework._in_imperative_mode(): + if not framework._in_dygraph_mode(): var.op = op return op @@ -768,7 +768,7 @@ class NumpyArrayInitializer(Initializer): value_name: values }, stop_gradient=True) - if not framework._in_imperative_mode(): + if not framework._in_dygraph_mode(): var.op = op return op diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py index 3569a8bc357daf9408e8ae3eb53ad9d2942cfeaa..3cdd05533f703ac27333daab7ada0c26392a24f5 100644 --- a/python/paddle/fluid/install_check.py +++ b/python/paddle/fluid/install_check.py @@ -17,7 +17,7 @@ from .param_attr import ParamAttr from .initializer import Constant from . import layers from . import backward -from .imperative import Layer, nn +from .dygraph import Layer, nn from . import executor from . import core diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index a85ef3c13f845959200d26391f6c95923a11c6ed..7eb912645e5077d35a2d11d7d09a033d28345e15 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -17,7 +17,7 @@ from __future__ import print_function import copy import six -from .framework import Parameter, dtype_is_floating, _in_imperative_mode +from .framework import Parameter, dtype_is_floating, _in_dygraph_mode from . import unique_name from paddle.fluid.initializer import Constant, Xavier from .param_attr import ParamAttr @@ -30,9 +30,9 @@ class LayerHelper(LayerHelperBase): def __init__(self, layer_type, **kwargs): self.kwargs = kwargs name = self.kwargs.get('name', None) - # TODO(panyx0718, minqiyang): imperative mode + # TODO(panyx0718, minqiyang): dygraph mode # can not use both `layer_type` and `name`. Deprecate LayerHelper - # and write a Helper for imperative mode. + # and write a Helper for dygraph mode. if name is None: self.kwargs['name'] = unique_name.generate(layer_type) diff --git a/python/paddle/fluid/layer_helper_base.py b/python/paddle/fluid/layer_helper_base.py index a68160d797bcaca8cff849c560960d6a8823de53..869a5f54e9cdf5740c5e216917d92880d7d61e2d 100644 --- a/python/paddle/fluid/layer_helper_base.py +++ b/python/paddle/fluid/layer_helper_base.py @@ -17,7 +17,7 @@ from __future__ import print_function import copy import numpy as np -from .framework import Variable, default_main_program, default_startup_program, _in_imperative_mode, _current_expected_place +from .framework import Variable, default_main_program, default_startup_program, _in_dygraph_mode, _current_expected_place from . import unique_name from .param_attr import ParamAttr, WeightNormParamAttr from . import core @@ -54,8 +54,8 @@ class LayerHelperBase(object): Return Variable construct from value """ if isinstance(value, np.ndarray): - assert _in_imperative_mode( - ), "to_variable could only be called in imperative mode" + assert _in_dygraph_mode( + ), "to_variable could only be called in dygraph mode" if not block: block = default_main_program().current_block() @@ -302,8 +302,8 @@ class LayerHelperBase(object): param = self._create_weight_normalize(attr, shape, dtype) WeightNormParamAttr.params_with_weight_norm.append(param) return param - if _in_imperative_mode(): - # In imperative mode, we want the returned parameter to be + if _in_dygraph_mode(): + # In dygraph mode, we want the returned parameter to be # initialized so that it can be used imperatively. return self.main_program.global_block().create_parameter( dtype=dtype, @@ -370,7 +370,7 @@ class LayerHelperBase(object): initializer: initializer to use """ assert isinstance(var, Variable) - if _in_imperative_mode(): + if _in_dygraph_mode(): initializer(var, var.block) else: self.startup_program.global_block().create_var( diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 3277766171d2d812f5fb0fd81556d7f979f0702f..a5e513ed5e35d530dd07c49339995461da8454a1 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -929,9 +929,9 @@ def array_read(array, i): Examples: .. code-block:: python - tmp = fluid.layers.zeros(shape=[10], dtype='int32') + array = fluid.layers.create_array(dtype='float32') i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) - arr = layers.array_read(tmp, i=i) + item = fluid.layers.array_read(array, i) """ helper = LayerHelper('array_read', **locals()) if not isinstance( diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 378aeb37605f1971da3fe4a926e4b36b8eae2ca4..b7d1eeba80d93d549a019455087bb7cc1d2a1083 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -22,18 +22,21 @@ strategy according to this module. from __future__ import print_function +import math + from . import control_flow from . import nn from . import ops from . import tensor from ..initializer import init_on_cpu from ..framework import default_main_program, Parameter, unique_name, name_scope -import math +from ..dygraph import base as imperative_base +from ..dygraph import learning_rate_scheduler as imperate_lr __all__ = [ 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', 'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS', - 'cosine_decay' + 'cosine_decay', 'linear_lr_warmup' ] @@ -66,13 +69,17 @@ def noam_decay(d_model, warmup_steps): The decayed learning rate. """ with default_main_program()._lr_schedule_guard(): - global_step = _decay_step_counter(1) + if imperative_base.enabled(): + decay = imperate_lr.NoamDecay(d_model, warmup_steps) + return decay + else: + global_step = _decay_step_counter(1) - a = global_step**-0.5 - b = (warmup_steps**-1.5) * global_step - lr_value = (d_model**-0.5) * nn.elementwise_min(a, b) + a = global_step**-0.5 + b = (warmup_steps**-1.5) * global_step + lr_value = (d_model**-0.5) * nn.elementwise_min(a, b) - return lr_value + return lr_value def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False): @@ -112,14 +119,19 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False): """ with default_main_program()._lr_schedule_guard(): - global_step = _decay_step_counter() + if imperative_base.enabled(): + decay = imperate_lr.ExponentialDecay(learning_rate, decay_steps, + decay_rate, staircase) + return decay + else: + global_step = _decay_step_counter() - div_res = global_step / decay_steps - if staircase: - div_res = ops.floor(div_res) - decayed_lr = learning_rate * (decay_rate**div_res) + div_res = global_step / decay_steps + if staircase: + div_res = ops.floor(div_res) + decayed_lr = learning_rate * (decay_rate**div_res) - return decayed_lr + return decayed_lr def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False): @@ -141,14 +153,19 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False): The decayed learning rate """ with default_main_program()._lr_schedule_guard(): - global_step = _decay_step_counter() + if imperative_base.enabled(): + decay = imperate_lr.NaturalExpDecay(learning_rate, decay_steps, + decay_rate, staircase) + return decay + else: + global_step = _decay_step_counter() - div_res = global_step / decay_steps - if staircase: - div_res = ops.floor(div_res) - decayed_lr = learning_rate * ops.exp(-1 * decay_rate * div_res) + div_res = global_step / decay_steps + if staircase: + div_res = ops.floor(div_res) + decayed_lr = learning_rate * ops.exp(-1 * decay_rate * div_res) - return decayed_lr + return decayed_lr def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False): @@ -187,15 +204,20 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False): sgd_optimizer.minimize(avg_cost) """ with default_main_program()._lr_schedule_guard(): - global_step = _decay_step_counter() + if imperative_base.enabled(): + decay = imperate_lr.InverseTimeDecay(learning_rate, decay_steps, + decay_rate, staircase) + return decay + else: + global_step = _decay_step_counter() - div_res = global_step / decay_steps - if staircase: - div_res = ops.floor(div_res) + div_res = global_step / decay_steps + if staircase: + div_res = ops.floor(div_res) - decayed_lr = learning_rate / (1 + decay_rate * div_res) + decayed_lr = learning_rate / (1 + decay_rate * div_res) - return decayed_lr + return decayed_lr def polynomial_decay(learning_rate, @@ -227,27 +249,33 @@ def polynomial_decay(learning_rate, Variable: The decayed learning rate """ with default_main_program()._lr_schedule_guard(): - global_step = _decay_step_counter() - - if cycle: - div_res = ops.ceil(global_step / decay_steps) - zero_var = tensor.fill_constant( - shape=[1], dtype='float32', value=0.0) - one_var = tensor.fill_constant( - shape=[1], dtype='float32', value=1.0) - - with control_flow.Switch() as switch: - with switch.case(global_step == zero_var): - tensor.assign(input=one_var, output=div_res) - decay_steps = decay_steps * div_res + if imperative_base.enabled(): + decay = imperate_lr.PolynomialDecay(learning_rate, decay_steps, + end_learning_rate, power, cycle) + return decay else: - decay_steps_var = tensor.fill_constant( - shape=[1], dtype='float32', value=float(decay_steps)) - global_step = nn.elementwise_min(x=global_step, y=decay_steps_var) + global_step = _decay_step_counter() + + if cycle: + div_res = ops.ceil(global_step / decay_steps) + zero_var = tensor.fill_constant( + shape=[1], dtype='float32', value=0.0) + one_var = tensor.fill_constant( + shape=[1], dtype='float32', value=1.0) + + with control_flow.Switch() as switch: + with switch.case(global_step == zero_var): + tensor.assign(input=one_var, output=div_res) + decay_steps = decay_steps * div_res + else: + decay_steps_var = tensor.fill_constant( + shape=[1], dtype='float32', value=float(decay_steps)) + global_step = nn.elementwise_min( + x=global_step, y=decay_steps_var) - decayed_lr = (learning_rate - end_learning_rate) * \ - ((1 - global_step / decay_steps) ** power) + end_learning_rate - return decayed_lr + decayed_lr = (learning_rate - end_learning_rate) * \ + ((1 - global_step / decay_steps) ** power) + end_learning_rate + return decayed_lr def piecewise_decay(boundaries, values): @@ -279,34 +307,38 @@ def piecewise_decay(boundaries, values): if len(values) - len(boundaries) != 1: raise ValueError("len(values) - len(boundaries) should be 1") - global_step = _decay_step_counter() + if imperative_base.enabled(): + decay = imperate_lr.PiecewiseDecay(boundaries, values, 0) + return decay + else: + global_step = _decay_step_counter() - lr = tensor.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=True, - name="learning_rate") + lr = tensor.create_global_var( + shape=[1], + value=0.0, + dtype='float32', + persistable=True, + name="learning_rate") - with control_flow.Switch() as switch: - for i in range(len(boundaries)): - boundary_val = tensor.fill_constant( + with control_flow.Switch() as switch: + for i in range(len(boundaries)): + boundary_val = tensor.fill_constant( + shape=[1], + dtype='float32', + value=float(boundaries[i]), + force_cpu=True) + value_var = tensor.fill_constant( + shape=[1], dtype='float32', value=float(values[i])) + with switch.case(global_step < boundary_val): + tensor.assign(value_var, lr) + last_value_var = tensor.fill_constant( shape=[1], dtype='float32', - value=float(boundaries[i]), - force_cpu=True) - value_var = tensor.fill_constant( - shape=[1], dtype='float32', value=float(values[i])) - with switch.case(global_step < boundary_val): - tensor.assign(value_var, lr) - last_value_var = tensor.fill_constant( - shape=[1], - dtype='float32', - value=float(values[len(values) - 1])) - with switch.default(): - tensor.assign(last_value_var, lr) + value=float(values[len(values) - 1])) + with switch.default(): + tensor.assign(last_value_var, lr) - return lr + return lr def cosine_decay(learning_rate, step_each_epoch, epochs): @@ -336,12 +368,17 @@ def cosine_decay(learning_rate, step_each_epoch, epochs): learning_rate = base_lr, step_each_epoch=10000, epochs=120) """ with default_main_program()._lr_schedule_guard(): - global_step = _decay_step_counter() + if imperative_base.enabled(): + decay = imperate_lr.CosineDecay(learning_rate, step_each_epoch, + epochs) + return decay + else: + global_step = _decay_step_counter() - cur_epoch = ops.floor(global_step / step_each_epoch) - decayed_lr = learning_rate * 0.5 * ( - ops.cos(cur_epoch * math.pi / epochs) + 1) - return decayed_lr + cur_epoch = ops.floor(global_step / step_each_epoch) + decayed_lr = learning_rate * 0.5 * ( + ops.cos(cur_epoch * math.pi / epochs) + 1) + return decayed_lr def append_LARS(params_grads, learning_rate, weight_decay): @@ -363,6 +400,9 @@ def append_LARS(params_grads, learning_rate, weight_decay): / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param))) """ + assert not imperative_base.enabled( + ), "append_LARS is NOT supported in dygraph mode now" + def _balanced_weight(param_norm, grad_norm): if weight_decay == 1.0: return grad_norm + param_norm @@ -383,3 +423,59 @@ def append_LARS(params_grads, learning_rate, weight_decay): / _balanced_weight(param_norm, grad_norm) # set back param local learning rate param.optimize_attr['learning_rate'] = decayed_lr + + +def linear_lr_warmup(learning_rate, warmup_steps, start_lr, end_lr): + """ + Applies linear learning rate warmup before the normal learning rate + scheduling. + + .. code-block:: python + + if global_step < warmup_steps: + linear_step = end_lr - start_lr + lr = start_lr + linear_step * (global_step / warmup_steps) + + Args: + learning_rate (float | Variable): A float value or Variable. + warmup_steps (int): The warmup steps. + start_lr (float): The start learning of warmup. + end_lr (float): The end learning of warmup. + + Returns: + The decayed learning rate in warmup period. + + Examples: + .. code-block:: python + + boundaries = [100, 200] + lr_steps = [0.1, 0.01, 0.001] + warmup_steps = 50 + start_lr = 1. / 3. + end_lr = 0.1 + decayed_lr = fluid.layers.linear_lr_warmup( + fluid.layers.piecewise_decay(boundaries, lr_steps), + warmup_steps, start_lr, end_lr) + + """ + assert (isinstance(end_lr, float)) + assert (isinstance(start_lr, float)) + linear_step = end_lr - start_lr + with default_main_program()._lr_schedule_guard(): + lr = tensor.create_global_var( + shape=[1], + value=0.0, + dtype='float32', + persistable=True, + name="learning_rate_warmup") + + global_step = _decay_step_counter() + + with control_flow.Switch() as switch: + with switch.case(global_step < warmup_steps): + decayed_lr = start_lr + linear_step * (global_step / + float(warmup_steps)) + tensor.assign(decayed_lr, lr) + with switch.default(): + tensor.assign(learning_rate, lr) + return lr diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index f2413f603304f8262476ca3ae2b820c89d009c3d..91414fdeb207781afd5e28afa5a3fa6e1018efb1 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -23,8 +23,8 @@ import os import inspect from ..layer_helper import LayerHelper from ..initializer import Normal, Constant, NumpyArrayInitializer -from ..framework import Variable, OpProtoHolder, _in_imperative_mode -from ..imperative import base +from ..framework import Variable, OpProtoHolder, _in_dygraph_mode +from ..dygraph import base from ..param_attr import ParamAttr from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_ from .tensor import concat, assign @@ -32,7 +32,7 @@ from . import utils from .. import unique_name from functools import reduce from .. import core -from ..imperative import layers +from ..dygraph import layers __all__ = [ 'fc', @@ -183,10 +183,12 @@ __all__ = [ 'get_tensor_from_selected_rows', 'lstm', 'shuffle_channel', + 'temporal_shift', 'py_func', 'psroi_pool', 'teacher_student_sigmoid_loss', 'huber_loss', + 'kldiv_loss', 'tree_conv', 'npair_loss', 'fsp_matrix', @@ -296,7 +298,6 @@ def fc(input, data_2 = fluid.layers.data(name="data_2", shape=[24, 36], dtype="float32") fc = fluid.layers.fc(input=[data_1, data_2], size=1000, act="tanh") """ - helper = LayerHelper("fc", **locals()) dtype = helper.input_dtype() @@ -1820,17 +1821,18 @@ def sequence_softmax(input, use_cudnn=False, name=None): return softmax_out -def softmax(input, use_cudnn=False, name=None): +def softmax(input, use_cudnn=False, name=None, axis=-1): """ The input of the softmax operator is a tensor of any rank. The output tensor has the same shape as the input. - The input tensor will first be logically flattened to a 2-D matrix. The matrix's - second dimension(row length) is as same as the last dimension of the input + The dimension :attr:`axis` of the input tensor will be permuted to the last. + Then the input tensor will be logically flattened to a 2-D matrix. The matrix's + second dimension(row length) is the same as the dimension :attr:`axis` of the input tensor, and the first dimension(column length) is the product of all other dimensions of the input tensor. For each row of the matrix, the softmax operator squashes the K-dimensional(K is the width of the matrix, which is also the size - of the input tensor's last dimension) vector of arbitrary real values to a + of the input tensor's dimension :attr:`axis`) vector of arbitrary real values to a K-dimensional vector of real values in the range [0, 1] that add up to 1. It computes the exponential of the given dimension and the sum of exponential @@ -1852,6 +1854,9 @@ def softmax(input, use_cudnn=False, name=None): False by default. Default: False name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. Default: None. + axis (int): The index of dimension to perform softmax calculations, it should + be in range :math:`[-1, rank - 1]`, while :math:`rank` is the rank of + input variable. Default: -1. Returns: Variable: output of softmax @@ -1861,7 +1866,10 @@ def softmax(input, use_cudnn=False, name=None): .. code-block:: python fc = fluid.layers.fc(input=x, size=10) - softmax = fluid.layers.softmax(input=fc) + # perform softmax in the second dimension + softmax = fluid.layers.softmax(input=fc, axis=1) + # perform softmax in the last dimension + softmax = fluid.layers.softmax(input=fc, axis=-1) """ helper = LayerHelper('softmax', **locals()) @@ -1871,7 +1879,8 @@ def softmax(input, use_cudnn=False, name=None): type="softmax", inputs={"X": input}, outputs={"Out": softmax_out}, - attrs={"use_cudnn": use_cudnn}) + attrs={"axis": axis, + "use_cudnn": use_cudnn}) return softmax_out @@ -3279,6 +3288,8 @@ def layer_norm(input, >>> dtype='float32') >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) """ + assert _in_dygraph_mode( + ) is not True, "please use FC instead of fc in dygraph mode!" helper = LayerHelper('layer_norm', **locals()) dtype = helper.input_dtype() @@ -5866,11 +5877,49 @@ def multiplex(inputs, index): """ ${comment} - >>> import paddle.fluid as fluid - >>> x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32') - >>> x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32') - >>> index = fluid.layers.data(name='index', shape=[1], dtype='int32') - >>> out = fluid.layers.multiplex(inputs=[x1, x2], index=index) + For Example: + + .. code-block:: text + + case 1: + + Given: + + X = [[[0,0,3,4], [0,1,3,4], [0,2,4,4], [0,3,3,4]], + [[1,0,3,4], [1,1,7,8], [1,2,4,2], [1,3,3,4]], + [[2,0,3,4], [2,1,7,8], [2,2,4,2], [2,3,3,4]], + [[3,0,3,4], [3,1,7,8], [3,2,4,2], [3,3,3,4]]] + + index = [3,0,1,2] + + out:[[3 0 3 4] // X[3,0] (3 = index[i], 0 = i); i=0 + [0 1 3 4] // X[0,1] (0 = index[i], 1 = i); i=1 + [1 2 4 2] // X[1,2] (0 = index[i], 2 = i); i=2 + [2 3 3 4]] // X[2,3] (0 = index[i], 3 = i); i=3 + + case 2: + + Given: + + X = [[[0,0,3,4], [0,1,3,4], [0,2,4,4], [0,3,3,4]], + [[1,0,3,4], [1,1,7,8], [1,2,4,2], [1,3,3,4]]] + + index = [1,0] + + out:[[1 0 3 4] // X[1,0] (3 = index[0], 0 = i); i=1 + [0 1 3 4] // X[0,1] (0 = index[1], 1 = i); i=2 + [0 2 4 4] // X[0,2] (0 = 0, 2 = i); i=3 + [0 3 3 4]] // X[0,3] (0 = 0, 3 = i); i=4 + + Examples: + + .. code-block:: python + + import paddle.fluid as fluid + x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32') + x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32') + index = fluid.layers.data(name='index', shape=[1], dtype='int32') + out = fluid.layers.multiplex(inputs=[x1, x2], index=index) Args: inputs (list): ${x_comment}. @@ -6405,8 +6454,8 @@ def squeeze(input, axes, name=None): x = layers.data(name='x', shape=[5, 1, 10]) y = layers.sequeeze(input=x, axes=[1]) """ - assert not _in_imperative_mode(), ( - "squeeze layer is not supported in imperative mode yet.") + assert not _in_dygraph_mode(), ( + "squeeze layer is not supported in dygraph mode yet.") helper = LayerHelper("squeeze", **locals()) out = helper.create_variable_for_type_inference(dtype=input.dtype) x_shape = helper.create_variable_for_type_inference(dtype=input.dtype) @@ -9144,7 +9193,7 @@ def _elementwise_op(helper): op_type = helper.layer_type x = helper.kwargs.get('x', None) y = helper.kwargs.get('y', None) - if _in_imperative_mode(): + if _in_dygraph_mode(): x = base.to_variable(x) y = base.to_variable(y) @@ -10352,6 +10401,48 @@ def shuffle_channel(x, group, name=None): return out +@templatedoc() +def temporal_shift(x, seg_num, shift_ratio=0.25, name=None): + """ + **Temporal Shift Operator** + + ${comment} + + Args: + x(Variable): ${x_comment} + seg_num(int): ${seg_num_comment} + shift_ratio(float): ${shift_ratio_comment} + name (str, default None): The name of this layer. + + Returns: + out(Variable): The temporal shifting result is a tensor variable with the + same shape and same type as the input. + + Raises: + TypeError: seg_num must be int type. + + Examples: + .. code-block:: python + + input = fluid.layers.data(name='input', shape=[4,2,2], dtype='float32') + out = fluid.layers.temporal_shift(x=input, seg_num=2, shift_ratio=0.2) + """ + helper = LayerHelper("temporal_shift", **locals()) + + out = helper.create_variable_for_type_inference(dtype=x.dtype) + + if not isinstance(seg_num, int): + raise TypeError("seg_num must be int type.") + + helper.append_op( + type="temporal_shift", + inputs={"X": x}, + outputs={"Out": out}, + attrs={"seg_num": seg_num, + "shift_ratio": shift_ratio}) + return out + + class PyFuncRegistry(object): _register_funcs = [] @@ -10672,6 +10763,38 @@ def huber_loss(input, label, delta): return out +@templatedoc() +def kldiv_loss(x, target, reduction='mean', name=None): + """ + ${comment} + + Args: + x (Variable): ${x_comment} + target (Variable): ${target_comment} + reduction (Variable): ${reduction_comment} + name (str, default None): The name of this layer. + + Returns: + kldiv\_loss (Variable): The KL divergence loss. + + Examples: + .. code-block:: python + + x = fluid.layers.data(name='x', shape=[4,2,2], dtype='float32') + target = fluid.layers.data(name='target', shape=[4,2,2], dtype='float32') + loss = fluid.layers.kldiv_loss(x=x, target=target, reduction='batchmean') + """ + helper = LayerHelper('kldiv_loss', **locals()) + loss = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type='kldiv_loss', + inputs={'X': x, + 'Target': target}, + outputs={'Loss': loss}, + attrs={'reduction': reduction}) + return loss + + @templatedoc() def tree_conv(nodes_vector, edge_set, diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index ef90638c721810e618ce4760e83e1a63b86c2325..80450119f44e93aae4b483983484ea18be5b2035 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -20,7 +20,6 @@ from ..framework import convert_np_dtype_to_dtype_ from ..framework import Variable from ..initializer import Constant, force_init_on_cpu from ..core import VarDesc -from ..imperative import base as imperative_base from .layer_function_generator import templatedoc import numpy diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index e21f303a3e07fe176920cd0650fb96f600dd4743..79accabe87869c832b7467acbaf70d11cbca8a96 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -30,7 +30,8 @@ from .initializer import Constant from .layer_helper import LayerHelper from .layers import ops from .regularizer import append_regularization_ops -from .imperative import base as imperative_base +from .dygraph import base as imperative_base +from .dygraph.learning_rate_scheduler import LearningRateDecay from paddle.fluid import core from paddle.fluid.layers import tensor from functools import reduce @@ -54,9 +55,19 @@ class Optimizer(object): """ def __init__(self, learning_rate, regularization=None, name=None): - if not isinstance(learning_rate, float) and \ - not isinstance(learning_rate, framework.Variable): - raise TypeError("learning rate should be float or Variable") + if framework._in_dygraph_mode(): + if not isinstance(learning_rate, float) and \ + not isinstance(learning_rate, LearningRateDecay): + raise TypeError( + "learning rate should be float or LearningRateDecay, got %s here" + % type(learning_rate)) + else: + if not isinstance(learning_rate, float) and \ + not isinstance(learning_rate, framework.Variable): + raise TypeError( + "learning rate should be float or Variable, got %s here" % + type(learning_rate)) + self._name = name self.regularization = regularization self._learning_rate = learning_rate @@ -80,24 +91,49 @@ class Optimizer(object): return self._opti_name_list def _create_global_learning_rate(self): - lr = self._global_learning_rate() + if imperative_base.enabled(): + # create learning rate Variable + if isinstance(self._learning_rate, float): + lr = self._global_learning_rate() - if isinstance(lr, framework.Variable): - return - else: - if not isinstance(self._learning_rate, float): + if isinstance(lr, framework.Variable): + return + else: + self._learning_rate_map[framework.default_main_program( + )] = layers.create_global_var( + name=unique_name.generate("learning_rate"), + shape=[1], + value=float(self._learning_rate), + dtype='float32' if self._dtype is None else self._dtype, + persistable=True) + # get learning rate Variable from LearningRateDecay + elif isinstance(self._learning_rate, LearningRateDecay): + self._learning_rate_map[framework.default_main_program( + )] = self._learning_rate() + else: raise TypeError( - "learning rate variable is create outside optimizer," - "can not create new learning rate variable for new program") + "optimizer's learning rate must be float or LearningRateDecay" + ) + else: + lr = self._global_learning_rate() - # create learning rate in the current main program - self._learning_rate_map[framework.default_main_program( - )] = layers.create_global_var( - name=unique_name.generate("learning_rate"), - shape=[1], - value=float(self._learning_rate), - dtype='float32' if self._dtype is None else self._dtype, - persistable=True) + if isinstance(lr, framework.Variable): + return + else: + if not isinstance(self._learning_rate, float): + raise TypeError( + "learning rate variable is create outside optimizer," + "can not create new learning rate variable for new program" + ) + + # create learning rate in the current main program + self._learning_rate_map[framework.default_main_program( + )] = layers.create_global_var( + name=unique_name.generate("learning_rate"), + shape=[1], + value=float(self._learning_rate), + dtype='float32' if self._dtype is None else self._dtype, + persistable=True) def _global_learning_rate(self, program=None): """ @@ -169,7 +205,7 @@ class Optimizer(object): name = self._name + "_" + name if (name in self._accumulators and param.name in self._accumulators[name]): - if framework._in_imperative_mode(): + if framework._in_dygraph_mode(): return self._accumulators[name][param.name] raise Exception("Accumulator {} already exists for parameter {}". format(name, param.name)) @@ -326,12 +362,38 @@ class Optimizer(object): Examples: See examples in `apply_gradients`. """ - if callbacks is None: - callbacks = [error_clip_callback] + self._dtype = loss.dtype + if framework._in_dygraph_mode(): + if parameter_list is not None: + parameters = parameter_list + else: + parameters = framework._dygraph_tracer().all_parameters() + + params_grads = [] + for param in parameters: + if not param.trainable: + continue + if param._ivar._grad_ivar() is not None: + # create gradient variable + grad_var = Variable( + block=loss.block, + name=param._ivar._grad_name(), + stop_gradient=True, + ivar=param._ivar._grad_ivar()) + params_grads.append((param, grad_var)) else: - assert (isinstance(callbacks, list)) - callbacks.append(error_clip_callback) - return append_backward(loss, parameter_list, no_grad_set, callbacks) + if callbacks is None: + callbacks = [error_clip_callback] + else: + assert (isinstance(callbacks, list)) + program = loss.block.program + with program_guard(program, startup_program): + params_grads = append_backward(loss, parameter_list, + no_grad_set, callbacks) + # Note: since we can't use all_reduce_op now, + # dgc_op should be the last op of one grad. + self._append_dgc_ops(params_grads) + return params_grads def apply_gradients(self, params_grads): """ @@ -372,6 +434,30 @@ class Optimizer(object): return optimize_ops + def apply_optimize(self, loss, startup_program, params_grads): + """ + Second part of `minimize`, appending optimization operators for + given `params_grads` pairs. + + Args: + loss (Variable): loss variable to run optimizations. + startup_program (Program): startup_program for initializing parameters + in `parameter_list`. + params_grads (list): list of (param, grad) pair to do optimization. + + Returns: + list: A list of operators appended to the current program. + """ + if framework._in_dygraph_mode(): + with program_guard(framework.default_main_program(), + framework.default_startup_program()): + optimize_ops = self._create_optimization_pass(params_grads) + else: + program = loss.block.program + with program_guard(program, startup_program): + optimize_ops = self.apply_gradients(params_grads) + return optimize_ops + def minimize(self, loss, startup_program=None, @@ -394,38 +480,13 @@ class Optimizer(object): tuple: (optimize_ops, params_grads) which are, list of operators appended; and list of (param, grad) Variables pair for optimization. """ - self._dtype = loss.dtype - optimize_ops = [] - if framework._in_imperative_mode(): - if parameter_list is not None: - parameters = parameter_list - else: - parameters = framework._imperative_tracer().all_parameters() - - params_grads = [] - for param in parameters: - if not param.trainable: - continue - if param._ivar._grad_ivar() is not None: - # create gradient variable - grad_var = Variable( - block=loss.block, - name=param._ivar._grad_name(), - stop_gradient=True, - ivar=param._ivar._grad_ivar()) - params_grads.append((param, grad_var)) - with program_guard(framework.default_main_program(), - framework.default_startup_program()): - optimize_ops = self._create_optimization_pass(params_grads) - else: - program = loss.block.program - with program_guard(program, startup_program): - params_grads = self.backward(loss, startup_program, - parameter_list, no_grad_set) - # Note: since we can't use all_reduce_op now, - # dgc_op should be the last op of one grad. - self._append_dgc_ops(params_grads) - optimize_ops = self.apply_gradients(params_grads) + params_grads = self.backward( + loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) + optimize_ops = self.apply_optimize( + loss, startup_program=startup_program, params_grads=params_grads) return optimize_ops, params_grads @@ -581,10 +642,10 @@ class DGCMomentumOptimizer(MomentumOptimizer): DGC also uses momentum factor masking and warmup training to overcome the staleness problem caused by reduced communication. This optimizer will do two things: - + 1. Compress the gradient by get TopK import value from tensor \ and use it for allreduce to reduce network bandwidth. - + 2. Call momentum to optimize on the cost. Args: diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index d139feac6ffe5a223a6628e95cd47cabc29cdd14..d70154decd999d3a28dfeb9595da4a66bd048815 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -78,7 +78,7 @@ list(REMOVE_ITEM TEST_OPS test_image_classification_resnet) list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op) list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) list(REMOVE_ITEM TEST_OPS test_imperative_resnet) -list(REMOVE_ITEM TEST_OPS test_imperative_optimizer) +list(REMOVE_ITEM TEST_OPS test_imperative_mnist) list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer) foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP}) @@ -89,7 +89,7 @@ py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op SERIAL) py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op SERIAL) py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS FLAGS_cudnn_deterministic=1) -py_test_modules(test_imperative_optimizer MODULES test_imperative_optimizer ENVS +py_test_modules(test_imperative_mnist MODULES test_imperative_mnist ENVS FLAGS_cudnn_deterministic=1) if(WITH_DISTRIBUTE) py_test_modules(test_dist_train MODULES test_dist_train SERIAL) diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index b84ce2b3aeab7963f8de85eb09ff6e085e52c198..6b8622b6f26f6102e5ee02716f30a847ed9a2fed 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -262,14 +262,14 @@ class OpTest(unittest.TestCase): if isinstance(value, tuple): data = value[0] lod = value[1] - v = fluid.imperative.base.to_variable(value=data) + v = fluid.dygraph.base.to_variable(value=data) v._ivar.value().get_tensor().set_recursive_sequence_lengths(lod) return v else: - return fluid.imperative.base.to_variable(value) + return fluid.dygraph.base.to_variable(value) - def _calc_imperative_output(self, place, parallel=False, no_check_set=None): - with fluid.imperative.base.guard(place=place): + def _calc_dygraph_output(self, place, parallel=False, no_check_set=None): + with fluid.dygraph.base.guard(place=place): block = fluid.default_main_program().global_block() # prepare input variable @@ -316,7 +316,7 @@ class OpTest(unittest.TestCase): return outputs - def _calc_output(self, place, parallel=False, no_check_set=None): + def _calc_output(self, place, parallel=False, no_check_set=None, loss=None): program = Program() block = program.global_block() self._append_ops(block) @@ -329,8 +329,14 @@ class OpTest(unittest.TestCase): use_cuda = False if isinstance(place, fluid.CUDAPlace(0)): use_cuda = True - executor = fluid.ParallelExecutor( - use_cuda=use_cuda, loss_name=loss.name, main_program=program) + if loss: + executor = fluid.ParallelExecutor( + use_cuda=use_cuda, + loss_name=loss.name, + main_program=program) + else: + executor = fluid.ParallelExecutor( + use_cuda=use_cuda, main_program=program) else: executor = Executor(place) @@ -364,9 +370,9 @@ class OpTest(unittest.TestCase): atol, no_check_set=None, equal_nan=False, - check_imperative=False): - if check_imperative: - imperative_outs = self._calc_imperative_output( + check_dygraph=False): + if check_dygraph: + dygraph_outs = self._calc_dygraph_output( place, no_check_set=no_check_set) outs, fetch_list = self._calc_output(place, no_check_set=no_check_set) @@ -393,8 +399,8 @@ class OpTest(unittest.TestCase): type(sub_out)) for item in sub_out: sub_out_name, expect = item[0], item[1] - if check_imperative: - imperative_actual = imperative_outs[sub_out_name][0] + if check_dygraph: + imperative_actual = dygraph_outs[sub_out_name][0] imperative_actual_t = np.array( imperative_actual._ivar.value().get_tensor()) idx = find_actual(sub_out_name, fetch_list) @@ -407,7 +413,7 @@ class OpTest(unittest.TestCase): actual_t, expect_t, atol=atol, equal_nan=equal_nan), "Output (" + sub_out_name + ") has diff at " + str(place)) - if check_imperative: + if check_dygraph: self.assertTrue( np.allclose( imperative_actual_t, @@ -415,21 +421,21 @@ class OpTest(unittest.TestCase): atol=atol, equal_nan=equal_nan), "Output (" + sub_out_name + ") has diff at " + - str(place) + " in imperative mode") + str(place) + " in dygraph mode") if isinstance(expect, tuple): self.assertListEqual( actual.recursive_sequence_lengths(), expect[1], "Output (" + sub_out_name + ") has different lod at " + str(place)) - if check_imperative: + if check_dygraph: self.assertListEqual( imperative_actual._ivar.value().get_tensor() .recursive_sequence_lengths(), expect[1], "Output (" + out_name + ") has different lod at " + - str(place) + " in imperative mode") + str(place) + " in dygraph mode") else: - if check_imperative: - imperative_actual = imperative_outs[out_name][0] + if check_dygraph: + imperative_actual = dygraph_outs[out_name][0] imperative_actual_t = np.array( imperative_actual._ivar.value().get_tensor()) idx = find_actual(out_name, fetch_list) @@ -443,7 +449,7 @@ class OpTest(unittest.TestCase): "Output (" + out_name + ") has diff at " + str(place) + "\nExpect " + str(expect_t) + "\n" + "But Got" + str(actual_t) + " in class " + self.__class__.__name__) - if check_imperative: + if check_dygraph: self.assertTrue( np.allclose( imperative_actual_t, @@ -458,12 +464,12 @@ class OpTest(unittest.TestCase): self.assertListEqual(actual.recursive_sequence_lengths(), expect[1], "Output (" + out_name + ") has different lod at " + str(place)) - if check_imperative: + if check_dygraph: self.assertListEqual( imperative_actual._ivar.value().get_tensor() .recursive_sequence_lengths(), expect[1], "Output (" + out_name + ") has different lod at " + - str(place) + " in imperative mode") + str(place) + " in dygraph mode") def _get_places(self): if self.dtype == np.float16: @@ -490,11 +496,11 @@ class OpTest(unittest.TestCase): atol=1e-5, no_check_set=None, equal_nan=False, - check_imperative=False): + check_dygraph=False): places = self._get_places() for place in places: self.check_output_with_place(place, atol, no_check_set, equal_nan, - check_imperative) + check_dygraph) def check_output_customized(self, checker): places = self._get_places() diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py index b12aaea3219cb81e8fa0e7584120db510fb7b62c..9cb88d4a8553f3b750f6cf3b24115b4d188ed1d6 100644 --- a/python/paddle/fluid/tests/unittests/test_base_layer.py +++ b/python/paddle/fluid/tests/unittests/test_base_layer.py @@ -18,7 +18,7 @@ import numpy as np import paddle.fluid as fluid -class L1(fluid.imperative.Layer): +class L1(fluid.dygraph.Layer): def __init__(self, prefix): super(L1, self).__init__(prefix) self._param_attr = fluid.ParamAttr( @@ -32,7 +32,7 @@ class L1(fluid.imperative.Layer): return self.w1 + self.w2 -class L2(fluid.imperative.Layer): +class L2(fluid.dygraph.Layer): def __init__(self, prefix): super(L2, self).__init__(prefix) self.layer1 = L1(self.full_name()) @@ -42,7 +42,7 @@ class L2(fluid.imperative.Layer): return self.layer1() + self.layer2() -class L3(fluid.imperative.Layer): +class L3(fluid.dygraph.Layer): def __init__(self, prefix): super(L3, self).__init__(prefix) self.layer1 = L2(self.full_name()) @@ -54,7 +54,7 @@ class L3(fluid.imperative.Layer): class TestBaseLayer(unittest.TestCase): def test_one_level(self): - with fluid.imperative.guard(): + with fluid.dygraph.guard(): l = L1('test_one_level') ret = l() self.assertEqual(l.w1.name, "test_one_level/L1_0.w_0") @@ -62,7 +62,7 @@ class TestBaseLayer(unittest.TestCase): self.assertTrue(np.allclose(ret._numpy(), 0.2 * np.ones([2, 2]))) def test_three_level(self): - with fluid.imperative.guard(): + with fluid.dygraph.guard(): l = L3('test_three_level') names = [p.name for p in l.parameters()] ret = l() diff --git a/python/paddle/fluid/tests/unittests/test_gru_op.py b/python/paddle/fluid/tests/unittests/test_gru_op.py index 848c9a4952aebcf93fd7bf12f7bc4cd15c7a8b28..c66d59aceb05dfbf9beac809ff13841a77953695 100644 --- a/python/paddle/fluid/tests/unittests/test_gru_op.py +++ b/python/paddle/fluid/tests/unittests/test_gru_op.py @@ -156,7 +156,7 @@ class TestGRUOp(OpTest): } def test_check_output(self): - self.check_output(atol=1e-8, check_imperative=True) + self.check_output(atol=1e-8, check_dygraph=True) def test_check_grad(self): self.check_grad(['Input', 'H0', 'Weight', 'Bias'], ['Hidden']) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index 4c44195a3d42a1a2a4a072b0513f212b22269c31..13f2d662178c7e1474ec43fdeadf7046516eb8e5 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -18,11 +18,11 @@ import numpy as np import paddle.fluid as fluid from paddle.fluid import core -from paddle.fluid.imperative.nn import FC +from paddle.fluid.dygraph.nn import FC from test_imperative_base import new_program_scope -class MyLayer(fluid.imperative.Layer): +class MyLayer(fluid.dygraph.Layer): def __init__(self, name_scope): super(MyLayer, self).__init__(name_scope) @@ -34,7 +34,7 @@ class MyLayer(fluid.imperative.Layer): return [x] -class MyPyLayer(fluid.imperative.PyLayer): +class MyPyLayer(fluid.dygraph.PyLayer): def __init__(self): super(MyPyLayer, self).__init__() @@ -48,7 +48,7 @@ class MyPyLayer(fluid.imperative.PyLayer): return np.array(dout) * (1 - np.square(np.array(out))) -class MLP(fluid.imperative.Layer): +class MLP(fluid.dygraph.Layer): def __init__(self, name_scope): super(MLP, self).__init__(name_scope) self._fc1 = FC(self.full_name(), @@ -71,7 +71,7 @@ class MLP(fluid.imperative.Layer): return x -class SimpleRNNCell(fluid.imperative.Layer): +class SimpleRNNCell(fluid.dygraph.Layer): def __init__(self, name_scope, step_input_size, hidden_size, output_size, param_attr): super(SimpleRNNCell, self).__init__(name_scope) @@ -159,7 +159,7 @@ class SimpleRNNCell(fluid.imperative.Layer): return reduce_out, hidden -class SimpleRNN(fluid.imperative.Layer): +class SimpleRNN(fluid.dygraph.Layer): def __init__(self, name_scope): super(SimpleRNN, self).__init__(name_scope) self.seq_len = 4 @@ -194,10 +194,10 @@ class SimpleRNN(fluid.imperative.Layer): class TestImperative(unittest.TestCase): def test_sum_op(self): x = np.ones([2, 2], np.float32) - with fluid.imperative.guard(): + with fluid.dygraph.guard(): inputs = [] for _ in range(10): - inputs.append(fluid.imperative.base.to_variable(x)) + inputs.append(fluid.dygraph.base.to_variable(x)) ret = fluid.layers.sums(inputs) loss = fluid.layers.reduce_sum(ret) loss._backward() @@ -205,17 +205,17 @@ class TestImperative(unittest.TestCase): self.assertTrue(np.allclose(inputs[0]._gradient(), x)) def test_layer(self): - with fluid.imperative.guard(): + with fluid.dygraph.guard(): cl = core.Layer() cl.forward([]) - l = fluid.imperative.Layer("l") + l = fluid.dygraph.Layer("l") self.assertRaises(NotImplementedError, l.forward, []) def test_pylayer_func_id(self): - with fluid.imperative.guard(): + with fluid.dygraph.guard(): - class PyLayer1(fluid.imperative.PyLayer): + class PyLayer1(fluid.dygraph.PyLayer): def __init__(self): super(PyLayer1, self).__init__() @@ -227,7 +227,7 @@ class TestImperative(unittest.TestCase): def backward(input): return input - class PyLayer2(fluid.imperative.PyLayer): + class PyLayer2(fluid.dygraph.PyLayer): def __init__(self): super(PyLayer2, self).__init__() @@ -241,21 +241,21 @@ class TestImperative(unittest.TestCase): py_layer_1 = PyLayer1() py_layer_2 = PyLayer2() - py_layer_1(fluid.imperative.base.to_variable(np.ones([2, 2]))) - py_layer_2(fluid.imperative.base.to_variable(np.ones([2, 2]))) + py_layer_1(fluid.dygraph.base.to_variable(np.ones([2, 2]))) + py_layer_2(fluid.dygraph.base.to_variable(np.ones([2, 2]))) id = py_layer_1.forward_id self.assertGreater(id, 0) self.assertEqual(py_layer_1.backward_id, id + 1) self.assertEqual(py_layer_2.forward_id, id + 2) self.assertEqual(py_layer_2.backward_id, id + 3) - py_layer_1(fluid.imperative.base.to_variable(np.ones([2, 2]))) + py_layer_1(fluid.dygraph.base.to_variable(np.ones([2, 2]))) self.assertEqual(py_layer_1.forward_id, id) def test_pylayer(self): np_inp = np.ones([2, 2], np.float32) - with fluid.imperative.guard(): + with fluid.dygraph.guard(): my_py_layer = MyPyLayer() - var_inp = fluid.imperative.base.to_variable(np_inp) + var_inp = fluid.dygraph.base.to_variable(np_inp) outs = my_py_layer(var_inp) dy_out = np.sum(outs[0]._numpy()) outs[0]._backward() @@ -282,8 +282,8 @@ class TestImperative(unittest.TestCase): def test_layer_in_out(self): np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) - with fluid.imperative.guard(): - var_inp = fluid.imperative.base.to_variable(np_inp) + with fluid.dygraph.guard(): + var_inp = fluid.dygraph.base.to_variable(np_inp) l = MyLayer("my_layer") x = l(var_inp)[0] self.assertIsNotNone(x) @@ -310,8 +310,8 @@ class TestImperative(unittest.TestCase): def test_mlp(self): np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) - with fluid.imperative.guard(): - var_inp = fluid.imperative.base.to_variable(np_inp) + with fluid.dygraph.guard(): + var_inp = fluid.dygraph.base.to_variable(np_inp) mlp = MLP("mlp") out = mlp(var_inp) dy_out = out._numpy() @@ -353,8 +353,8 @@ class TestImperative(unittest.TestCase): [10.0, 11.0, 12.0]]) np_inp = np_inp.reshape((1, 4, 3)) np_inp = np_inp.astype(np.float32) - with fluid.imperative.guard(): - var_inp = fluid.imperative.base.to_variable(np_inp) + with fluid.dygraph.guard(): + var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) simple_rnn = SimpleRNN("simple_rnn") outs, pre_hiddens = simple_rnn.forward(var_inp) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py b/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py index 62c25f734598e35b7c668d1ec1b89b5c57449f73..a92b7d62fa598a3ec9b53bade2805cc033f4b9d9 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py @@ -18,11 +18,11 @@ import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.optimizer import SGDOptimizer -from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC -from paddle.fluid.imperative.base import to_variable +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC +from paddle.fluid.dygraph.base import to_variable -class SimpleImgConvPool(fluid.imperative.Layer): +class SimpleImgConvPool(fluid.dygraph.Layer): def __init__(self, name_scope, num_channels, @@ -71,7 +71,7 @@ class SimpleImgConvPool(fluid.imperative.Layer): return x -class MNIST(fluid.imperative.Layer): +class MNIST(fluid.dygraph.Layer): def __init__(self, name_scope): super(MNIST, self).__init__(name_scope) @@ -98,12 +98,12 @@ class MNIST(fluid.imperative.Layer): return x -class TestImperativeCheckpoint(unittest.TestCase): +class TestDygraphCheckpoint(unittest.TestCase): def save_load_persistables(self): seed = 90 epoch_num = 1 - with fluid.imperative.guard(): + with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed @@ -135,14 +135,14 @@ class TestImperativeCheckpoint(unittest.TestCase): avg_loss._backward() sgd.minimize(avg_loss) - fluid.imperative.save_persistables(mnist, "save_dir") + fluid.dygraph.save_persistables(mnist, "save_dir") mnist.clear_gradients() for param in mnist.parameters(): dy_param_init_value[param.name] = param._numpy() mnist.load_dict( - fluid.imperative.load_persistables(mnist, "save_dir")) + fluid.dygraph.load_persistables(mnist, "save_dir")) restore = mnist.parameters() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py index ac123ee8db26ac23bbf9454e399a592a28c91c32..ccebd4a54727f383bd4e46ff57bfdc9381577d05 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py @@ -22,7 +22,7 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core from test_imperative_base import new_program_scope -from paddle.fluid.imperative.base import to_variable +from paddle.fluid.dygraph.base import to_variable # Can use Amusic dataset as the DeepCF describes. DATA_PATH = os.environ.get('DATA_PATH', '') @@ -32,11 +32,11 @@ NUM_BATCHES = int(os.environ.get('NUM_BATCHES', 5)) NUM_EPOCHES = int(os.environ.get('NUM_EPOCHES', 1)) -class DMF(fluid.imperative.Layer): +class DMF(fluid.dygraph.Layer): def __init__(self, name_scope): super(DMF, self).__init__(name_scope) - self._user_latent = fluid.imperative.FC(self.full_name(), 256) - self._item_latent = fluid.imperative.FC(self.full_name(), 256) + self._user_latent = fluid.dygraph.FC(self.full_name(), 256) + self._item_latent = fluid.dygraph.FC(self.full_name(), 256) self._user_layers = [] self._item_layers = [] @@ -45,12 +45,12 @@ class DMF(fluid.imperative.Layer): self._user_layers.append( self.add_sublayer( 'user_layer_%d' % i, - fluid.imperative.FC( + fluid.dygraph.FC( self.full_name(), self._hid_sizes[i], act='relu'))) self._item_layers.append( self.add_sublayer( 'item_layer_%d' % i, - fluid.imperative.FC( + fluid.dygraph.FC( self.full_name(), self._hid_sizes[i], act='relu'))) def forward(self, users, items): @@ -63,18 +63,18 @@ class DMF(fluid.imperative.Layer): return fluid.layers.elementwise_mul(users, items) -class MLP(fluid.imperative.Layer): +class MLP(fluid.dygraph.Layer): def __init__(self, name_scope): super(MLP, self).__init__(name_scope) - self._user_latent = fluid.imperative.FC(self.full_name(), 256) - self._item_latent = fluid.imperative.FC(self.full_name(), 256) + self._user_latent = fluid.dygraph.FC(self.full_name(), 256) + self._item_latent = fluid.dygraph.FC(self.full_name(), 256) self._match_layers = [] self._hid_sizes = [128, 64] for i in range(len(self._hid_sizes)): self._match_layers.append( self.add_sublayer( 'match_layer_%d' % i, - fluid.imperative.FC( + fluid.dygraph.FC( self.full_name(), self._hid_sizes[i], act='relu'))) self._mat @@ -88,7 +88,7 @@ class MLP(fluid.imperative.Layer): return match_vec -class DeepCF(fluid.imperative.Layer): +class DeepCF(fluid.dygraph.Layer): def __init__(self, name_scope, num_users, num_items, matrix): super(DeepCF, self).__init__(name_scope) self._num_users = num_users @@ -103,7 +103,7 @@ class DeepCF(fluid.imperative.Layer): self._mlp = MLP(self.full_name()) self._dmf = DMF(self.full_name()) - self._match_fc = fluid.imperative.FC(self.full_name(), 1, act='sigmoid') + self._match_fc = fluid.dygraph.FC(self.full_name(), 1, act='sigmoid') def forward(self, users, items): # users_emb = self._user_emb(users) @@ -191,7 +191,7 @@ def load_data(DATA_PATH): np.expand_dims(labels_np, -1), num_users, num_items, matrix -class TestImperativeDeepCF(unittest.TestCase): +class TestDygraphDeepCF(unittest.TestCase): def test_deefcf(self): seed = 90 if DATA_PATH: @@ -237,7 +237,7 @@ class TestImperativeDeepCF(unittest.TestCase): fetch_list=[loss])[0] sys.stderr.write('static loss %s\n' % static_loss) - with fluid.imperative.guard(): + with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py index 6024fb5f816d10cedad36272e353704797526676..58faa1cb85af9cedb70f3a12244cfeb44e0f4f52 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py @@ -22,12 +22,12 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.optimizer import SGDOptimizer -from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC from test_imperative_base import new_program_scope -from paddle.fluid.imperative.base import to_variable +from paddle.fluid.dygraph.base import to_variable -class Discriminator(fluid.imperative.Layer): +class Discriminator(fluid.dygraph.Layer): def __init__(self, name_scope): super(Discriminator, self).__init__(name_scope) self._fc1 = FC(self.full_name(), size=32, act='elu') @@ -38,7 +38,7 @@ class Discriminator(fluid.imperative.Layer): return self._fc2(x) -class Generator(fluid.imperative.Layer): +class Generator(fluid.dygraph.Layer): def __init__(self, name_scope): super(Generator, self).__init__(name_scope) self._fc1 = FC(self.full_name(), size=64, act='elu') @@ -51,7 +51,7 @@ class Generator(fluid.imperative.Layer): return self._fc3(x) -class TestImperativeGAN(unittest.TestCase): +class TestDygraphGAN(unittest.TestCase): def test_gan_float32(self): seed = 90 @@ -130,7 +130,7 @@ class TestImperativeGAN(unittest.TestCase): scope.find_var(param.name).get_tensor()) dy_params = dict() - with fluid.imperative.guard(): + with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py index 2086fab5c81e241d1a49386d8285289b14364dc8..a8fb9ecfe4be16b73ac2144259f25ed3859ece7e 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py @@ -22,16 +22,16 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.optimizer import AdamOptimizer -from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC from test_imperative_base import new_program_scope -from paddle.fluid.imperative.base import to_variable +from paddle.fluid.dygraph.base import to_variable def gen_data(): pass -class GraphConv(fluid.imperative.Layer): +class GraphConv(fluid.dygraph.Layer): def __init__(self, name_scope, in_features, out_features): super(GraphConv, self).__init__(name_scope) @@ -50,7 +50,7 @@ class GraphConv(fluid.imperative.Layer): return fluid.layers.matmul(adj, support) + self.bias -class GCN(fluid.imperative.Layer): +class GCN(fluid.dygraph.Layer): def __init__(self, name_scope, num_hidden): super(GCN, self).__init__(name_scope) self.gc = GraphConv(self.full_name(), num_hidden, 32) @@ -61,7 +61,7 @@ class GCN(fluid.imperative.Layer): return self.gc2(x, adj) -class TestImperativeGNN(unittest.TestCase): +class TestDygraphGNN(unittest.TestCase): def test_gnn_float32(self): seed = 90 @@ -115,7 +115,7 @@ class TestImperativeGNN(unittest.TestCase): static_weight = np.array( scope.find_var(model.gc.weight.name).get_tensor()) - with fluid.imperative.guard(): + with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..5ab01839fbc20bbd3c242878c4ea23a00f7b0dca --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py @@ -0,0 +1,217 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import contextlib +import unittest +import numpy as np +import six + +import paddle +import paddle.fluid as fluid +from paddle.fluid import core +from paddle.fluid.optimizer import SGDOptimizer +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC +from paddle.fluid.dygraph.base import to_variable +from test_imperative_base import new_program_scope + + +class SimpleImgConvPool(fluid.dygraph.Layer): + def __init__(self, + name_scope, + num_channels, + num_filters, + filter_size, + pool_size, + pool_stride, + pool_padding=0, + pool_type='max', + global_pooling=False, + conv_stride=1, + conv_padding=0, + conv_dilation=1, + conv_groups=1, + act=None, + use_cudnn=False, + param_attr=None, + bias_attr=None): + super(SimpleImgConvPool, self).__init__(name_scope) + + self._conv2d = Conv2D( + self.full_name(), + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=conv_stride, + padding=conv_padding, + dilation=conv_dilation, + groups=conv_groups, + param_attr=None, + bias_attr=None, + use_cudnn=use_cudnn) + + self._pool2d = Pool2D( + self.full_name(), + pool_size=pool_size, + pool_type=pool_type, + pool_stride=pool_stride, + pool_padding=pool_padding, + global_pooling=global_pooling, + use_cudnn=use_cudnn) + + def forward(self, inputs): + x = self._conv2d(inputs) + x = self._pool2d(x) + return x + + +class MNIST(fluid.dygraph.Layer): + def __init__(self, name_scope): + super(MNIST, self).__init__(name_scope) + + self._simple_img_conv_pool_1 = SimpleImgConvPool( + self.full_name(), 1, 20, 5, 2, 2, act="relu") + + self._simple_img_conv_pool_2 = SimpleImgConvPool( + self.full_name(), 20, 50, 5, 2, 2, act="relu") + + pool_2_shape = 50 * 4 * 4 + SIZE = 10 + scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 + self._fc = FC(self.full_name(), + 10, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale)), + act="softmax") + + def forward(self, inputs): + x = self._simple_img_conv_pool_1(inputs) + x = self._simple_img_conv_pool_2(x) + x = self._fc(x) + return x + + +class TestImperativeMnist(unittest.TestCase): + def test_mnist_float32(self): + seed = 90 + epoch_num = 1 + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + mnist = MNIST("mnist") + sgd = SGDOptimizer(learning_rate=1e-3) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + + dy_param_init_value = {} + for epoch in range(epoch_num): + for batch_id, data in enumerate(train_reader()): + dy_x_data = np.array( + [x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(128, 1) + + img = to_variable(dy_x_data) + label = to_variable(y_data) + label._stop_gradient = True + + cost = mnist(img) + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + + dy_out = avg_loss._numpy() + + if epoch == 0 and batch_id == 0: + for param in mnist.parameters(): + dy_param_init_value[param.name] = param._numpy() + + avg_loss._backward() + sgd.minimize(avg_loss) + mnist.clear_gradients() + + dy_param_value = {} + for param in mnist.parameters(): + dy_param_value[param.name] = param._numpy() + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + + mnist = MNIST("mnist") + sgd = SGDOptimizer(learning_rate=1e-3) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + + img = fluid.layers.data( + name='pixel', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + cost = mnist(img) + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + sgd.minimize(avg_loss) + + # initialize params and fetch them + static_param_init_value = {} + static_param_name_list = [] + for param in mnist.parameters(): + static_param_name_list.append(param.name) + + out = exe.run(fluid.default_startup_program(), + fetch_list=static_param_name_list) + + for i in range(len(static_param_name_list)): + static_param_init_value[static_param_name_list[i]] = out[i] + + for epoch in range(epoch_num): + for batch_id, data in enumerate(train_reader()): + static_x_data = np.array( + [x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape([128, 1]) + + fetch_list = [avg_loss.name] + fetch_list.extend(static_param_name_list) + out = exe.run( + fluid.default_main_program(), + feed={"pixel": static_x_data, + "label": y_data}, + fetch_list=fetch_list) + + static_param_value = {} + static_out = out[0] + for i in range(1, len(out)): + static_param_value[static_param_name_list[i - 1]] = out[ + i] + + self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) + + for key, value in six.iteritems(static_param_init_value): + self.assertTrue(np.allclose(value, dy_param_init_value[key])) + + self.assertTrue(np.allclose(static_out, dy_out)) + + for key, value in six.iteritems(static_param_value): + self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index 5b3c250501386a7854313218f5ea338281824252..8b659a3e08e381dd6f55b666d9f5f1b172a51930 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -22,131 +22,71 @@ import six import paddle import paddle.fluid as fluid from paddle.fluid import core -from paddle.fluid.optimizer import SGDOptimizer -from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC -from paddle.fluid.imperative.base import to_variable +from paddle.fluid.optimizer import SGDOptimizer, Adam +from paddle.fluid.dygraph.nn import FC +from paddle.fluid.dygraph.base import to_variable from test_imperative_base import new_program_scope -class SimpleImgConvPool(fluid.imperative.Layer): - def __init__(self, - name_scope, - num_channels, - num_filters, - filter_size, - pool_size, - pool_stride, - pool_padding=0, - pool_type='max', - global_pooling=False, - conv_stride=1, - conv_padding=0, - conv_dilation=1, - conv_groups=1, - act=None, - use_cudnn=False, - param_attr=None, - bias_attr=None): - super(SimpleImgConvPool, self).__init__(name_scope) - - self._conv2d = Conv2D( - self.full_name(), - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=conv_stride, - padding=conv_padding, - dilation=conv_dilation, - groups=conv_groups, - param_attr=None, - bias_attr=None, - use_cudnn=use_cudnn) - - self._pool2d = Pool2D( - self.full_name(), - pool_size=pool_size, - pool_type=pool_type, - pool_stride=pool_stride, - pool_padding=pool_padding, - global_pooling=global_pooling, - use_cudnn=use_cudnn) +class MLP(fluid.dygraph.Layer): + def __init__(self, name_scope, param_attr=None, bias_attr=None): + super(MLP, self).__init__(name_scope) - def forward(self, inputs): - x = self._conv2d(inputs) - x = self._pool2d(x) - return x - - -class MNIST(fluid.imperative.Layer): - def __init__(self, name_scope): - super(MNIST, self).__init__(name_scope) + self._fc1 = FC(self.full_name(), 10) + self._fc2 = FC(self.full_name(), 10) - self._simple_img_conv_pool_1 = SimpleImgConvPool( - self.full_name(), 1, 20, 5, 2, 2, act="relu") - - self._simple_img_conv_pool_2 = SimpleImgConvPool( - self.full_name(), 20, 50, 5, 2, 2, act="relu") + def forward(self, inputs): + y = self._fc1(inputs) + y = self._fc2(y) + return y - pool_2_shape = 50 * 4 * 4 - SIZE = 10 - scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 - self._fc = FC(self.full_name(), - 10, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale)), - act="softmax") - def forward(self, inputs): - x = self._simple_img_conv_pool_1(inputs) - x = self._simple_img_conv_pool_2(x) - x = self._fc(x) - return x +class TestImperativeOptimizerBase(unittest.TestCase): + def setUp(self): + self.batch_num = 20 + def get_optimizer(self): + raise NotImplementedError() -class TestImperativeMnist(unittest.TestCase): - def test_mnist_float32(self): + def _check_mlp(self): seed = 90 - epoch_num = 1 - with fluid.imperative.guard(): + with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - mnist = MNIST("mnist") - sgd = SGDOptimizer(learning_rate=1e-3) + mlp = MLP('mlp') + optimizer = self.get_optimizer() train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=128, drop_last=True) dy_param_init_value = {} - for epoch in range(epoch_num): - for batch_id, data in enumerate(train_reader()): - dy_x_data = np.array( - [x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(128, 1) - - img = to_variable(dy_x_data) - label = to_variable(y_data) - label._stop_gradient = True - - cost = mnist(img) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) - - dy_out = avg_loss._numpy() - - if epoch == 0 and batch_id == 0: - for param in mnist.parameters(): - dy_param_init_value[param.name] = param._numpy() - - avg_loss._backward() - sgd.minimize(avg_loss) - mnist.clear_gradients() - - dy_param_value = {} - for param in mnist.parameters(): - dy_param_value[param.name] = param._numpy() + for batch_id, data in enumerate(train_reader()): + if batch_id >= self.batch_num: + break + + dy_x_data = np.array( + [x[0].reshape(1, 28, 28) for x in data]).astype('float32') + y_data = np.array([x[1] for x in data]).astype('int64').reshape( + 128, 1) + + img = to_variable(dy_x_data) + label = to_variable(y_data) + label._stop_gradient = True + + cost = mlp(img) + avg_loss = fluid.layers.reduce_mean(cost) + dy_out = avg_loss._numpy() + + if batch_id == 0: + for param in mlp.parameters(): + dy_param_init_value[param.name] = param._numpy() + + avg_loss._backward() + optimizer.minimize(avg_loss) + mlp.clear_gradients() + dy_param_value = {} + for param in mlp.parameters(): + dy_param_value[param.name] = param._numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed @@ -155,23 +95,22 @@ class TestImperativeMnist(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) - mnist = MNIST("mnist") - sgd = SGDOptimizer(learning_rate=1e-3) + mlp = MLP('mlp') + optimizer = self.get_optimizer() train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=128, drop_last=True) img = fluid.layers.data( name='pixel', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') - cost = mnist(img) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) - sgd.minimize(avg_loss) + cost = mlp(img) + avg_loss = fluid.layers.reduce_mean(cost) + optimizer.minimize(avg_loss) # initialize params and fetch them static_param_init_value = {} static_param_name_list = [] - for param in mnist.parameters(): + for param in mlp.parameters(): static_param_name_list.append(param.name) out = exe.run(fluid.default_startup_program(), @@ -180,29 +119,26 @@ class TestImperativeMnist(unittest.TestCase): for i in range(len(static_param_name_list)): static_param_init_value[static_param_name_list[i]] = out[i] - for epoch in range(epoch_num): - for batch_id, data in enumerate(train_reader()): - static_x_data = np.array( - [x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape([128, 1]) - - fetch_list = [avg_loss.name] - fetch_list.extend(static_param_name_list) - out = exe.run( - fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, - fetch_list=fetch_list) - - static_param_value = {} - static_out = out[0] - for i in range(1, len(out)): - static_param_value[static_param_name_list[i - 1]] = out[ - i] - - self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) + for batch_id, data in enumerate(train_reader()): + if batch_id >= self.batch_num: + break + + static_x_data = np.array( + [x[0].reshape(1, 28, 28) for x in data]).astype('float32') + y_data = np.array([x[1] for x in data]).astype('int64').reshape( + [128, 1]) + + fetch_list = [avg_loss.name] + fetch_list.extend(static_param_name_list) + out = exe.run(fluid.default_main_program(), + feed={"pixel": static_x_data, + "label": y_data}, + fetch_list=fetch_list) + + static_param_value = {} + static_out = out[0] + for i in range(1, len(out)): + static_param_value[static_param_name_list[i - 1]] = out[i] for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.allclose(value, dy_param_init_value[key])) @@ -210,7 +146,92 @@ class TestImperativeMnist(unittest.TestCase): self.assertTrue(np.allclose(static_out, dy_out)) for key, value in six.iteritems(static_param_value): - self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5)) + self.assertTrue(np.allclose(value, dy_param_value[key])) + + +class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase): + def get_optimizer(self): + bd = [3, 6, 9] + optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)])) + return optimizer + + def test_sgd(self): + self._check_mlp() + + +class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase): + def get_optimizer(self): + optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay( + learning_rate=0.1, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) + return optimizer + + def test_sgd(self): + self._check_mlp() + + +class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase): + def get_optimizer(self): + optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay( + learning_rate=0.1, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) + return optimizer + + def test_sgd(self): + self._check_mlp() + + +class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase): + def get_optimizer(self): + optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay( + learning_rate=0.1, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) + return optimizer + + def test_adam(self): + self._check_mlp() + + +class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase): + def get_optimizer(self): + optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay( + learning_rate=0.1, decay_steps=5, cycle=self.cycle)) + return optimizer + + def test_sgd_cycle(self): + self.cycle = True + self._check_mlp() + + def test_sgd(self): + self.cycle = False + self._check_mlp() + + +class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase): + def get_optimizer(self): + optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay( + learning_rate=0.1, step_each_epoch=10000, epochs=120)) + return optimizer + + def test_sgd(self): + self._check_mlp() + + +class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase): + def get_optimizer(self): + optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay( + d_model=512, warmup_steps=8000)) + return optimizer + + def test_sgd(self): + self._check_mlp() if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index 460ba65a48c863315cda4847aee1b4e2366bba96..998c675815ece9236c819bffc4a4b74d44ff790e 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -16,17 +16,17 @@ from __future__ import print_function import unittest import paddle.fluid as fluid -from paddle.fluid.imperative.nn import Embedding +from paddle.fluid.dygraph.nn import Embedding import paddle.fluid.framework as framework from paddle.fluid.optimizer import SGDOptimizer -from paddle.fluid.imperative.base import to_variable +from paddle.fluid.dygraph.base import to_variable from test_imperative_base import new_program_scope import numpy as np import six from paddle.fluid.backward import append_backward -class SimpleLSTMRNN(fluid.imperative.Layer): +class SimpleLSTMRNN(fluid.dygraph.Layer): def __init__(self, name_scope, hidden_size, @@ -131,7 +131,7 @@ class SimpleLSTMRNN(fluid.imperative.Layer): return real_res, last_hidden, last_cell -class PtbModel(fluid.imperative.Layer): +class PtbModel(fluid.dygraph.Layer): def __init__(self, name_scope, hidden_size, @@ -214,7 +214,7 @@ class PtbModel(fluid.imperative.Layer): return loss, last_hidden, last_cell -class TestImperativePtbRnn(unittest.TestCase): +class TestDygraphPtbRnn(unittest.TestCase): def test_ptb_rnn_cpu_float32(self): seed = 90 hidden_size = 10 @@ -224,7 +224,7 @@ class TestImperativePtbRnn(unittest.TestCase): init_scale = 0.1 batch_size = 4 - with fluid.imperative.guard(): + with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index ab9298890bf69774fd842ec202d833be0a57f7ad..1d786d584632769e4318bcdeb24ef7ef8ea18597 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -21,8 +21,8 @@ import paddle import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.imperative.nn import Conv2D, Pool2D, BatchNorm, FC -from paddle.fluid.imperative.base import to_variable +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC +from paddle.fluid.dygraph.base import to_variable from test_imperative_base import new_program_scope batch_size = 8 @@ -57,7 +57,7 @@ def optimizer_setting(params): lr = [] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] optimizer = fluid.optimizer.SGD(learning_rate=0.01) - # TODO(minqiyang): Add learning rate scheduler support to imperative mode + # TODO(minqiyang): Add learning rate scheduler support to dygraph mode # optimizer = fluid.optimizer.Momentum( # learning_rate=params["lr"], # learning_rate=fluid.layers.piecewise_decay( @@ -68,7 +68,7 @@ def optimizer_setting(params): return optimizer -class ConvBNLayer(fluid.imperative.Layer): +class ConvBNLayer(fluid.dygraph.Layer): def __init__(self, name_scope, num_channels, @@ -99,7 +99,7 @@ class ConvBNLayer(fluid.imperative.Layer): return y -class BottleneckBlock(fluid.imperative.Layer): +class BottleneckBlock(fluid.dygraph.Layer): def __init__(self, name_scope, num_channels, @@ -156,7 +156,7 @@ class BottleneckBlock(fluid.imperative.Layer): return layer_helper.append_activation(y) -class ResNet(fluid.imperative.Layer): +class ResNet(fluid.dygraph.Layer): def __init__(self, name_scope, layers=50, class_dim=102): super(ResNet, self).__init__(name_scope) @@ -226,13 +226,13 @@ class ResNet(fluid.imperative.Layer): return y -class TestImperativeResnet(unittest.TestCase): +class TestDygraphResnet(unittest.TestCase): def test_resnet_float32(self): seed = 90 batch_size = train_parameters["batch_size"] batch_num = 20 - with fluid.imperative.guard(): + with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer.py index b06d3e8894072943b06456340f928cda260763c3..3bdf3349730b0c9916449cfe0658d5a3c88834ed 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest import paddle.fluid as fluid -from paddle.fluid.imperative import Embedding, LayerNorm, FC, to_variable, Layer, guard +from paddle.fluid.dygraph import Embedding, LayerNorm, FC, to_variable, Layer, guard from test_imperative_base import new_program_scope from paddle.fluid import core import numpy as np @@ -623,7 +623,7 @@ class PrepareEncoderDecoderLayer(Layer): initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), trainable=False)) - # use in imperative_mode to fit different length batch + # use in dygraph_mode to fit different length batch # self._pos_emb._w = to_variable( # position_encoding_init(self._src_max_len, self._src_emb_dim)) @@ -946,7 +946,7 @@ class TransFormer(Layer): return sum_cost, avg_cost, predict, token_num -class TestImperativeTransformer(unittest.TestCase): +class TestDygraphTransformer(unittest.TestCase): def test_transformer_float32(self): seed = 90 with guard(): diff --git a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py new file mode 100644 index 0000000000000000000000000000000000000000..d0212d177e6f1c60b916a0cb0eef7cd7f54a3585 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py @@ -0,0 +1,82 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division + +import unittest +import numpy as np +from op_test import OpTest + + +def kldiv_loss(x, target, reduction): + output = target * (np.log(target) - x) + loss = np.where(target >= 0, output, np.zeros_like(x)) + + if reduction == "batchmean": + return loss.sum() / x.shape[0] + if reduction == "mean": + return loss.mean() + if reduction == "sum": + return loss.sum() + + return loss + + +class TestKLDivLossOp(OpTest): + def setUp(self): + self.initTestCase() + self.op_type = 'kldiv_loss' + x = np.random.uniform(-10, 10, self.x_shape).astype('float32') + target = np.random.uniform(-10, 10, self.x_shape).astype('float32') + + self.attrs = {"reduction": self.reduction} + + self.inputs = { + 'X': x, + 'Target': target, + } + loss = kldiv_loss(x, target, self.reduction) + self.outputs = {'Loss': loss.astype('float32')} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad( + ['X'], 'Loss', no_grad_set=set(["Target"]), max_relative_error=0.06) + + def initTestCase(self): + self.x_shape = (2, 5, 5) + self.reduction = 'batchmean' + + +class TestKLDivLossOp2(TestKLDivLossOp): + def initTestCase(self): + self.x_shape = (3, 2, 7, 7) + self.reduction = 'none' + + +class TestKLDivLossOp3(TestKLDivLossOp): + def initTestCase(self): + self.x_shape = (2, 3, 5, 7, 9) + self.reduction = 'mean' + + +class TestKLDivLossOp4(TestKLDivLossOp): + def initTestCase(self): + self.x_shape = (5, 7) + self.reduction = 'sum' + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 7fd9617cc7687a5a553ed22cfed560aef8058496..e92ece7acb41b5a63adaae8edba78486ca3adcf8 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -29,8 +29,8 @@ from paddle.fluid import core from paddle.fluid.initializer import Constant import paddle.fluid.layers as layers from test_imperative_base import new_program_scope -from paddle.fluid.imperative import nn -from paddle.fluid.imperative import base +from paddle.fluid.dygraph import nn +from paddle.fluid.dygraph import base class LayerTest(unittest.TestCase): @@ -68,7 +68,7 @@ class LayerTest(unittest.TestCase): @contextlib.contextmanager def dynamic_graph(self, force_to_use_cpu=False): - with fluid.imperative.guard( + with fluid.dygraph.guard( self._get_place(force_to_use_cpu=force_to_use_cpu)): fluid.default_startup_program().random_seed = self.seed fluid.default_main_program().random_seed = self.seed @@ -845,7 +845,7 @@ class TestBook(unittest.TestCase): with program_guard(program): data = layers.data(name='data', shape=[10], dtype='float32') hid = layers.fc(input=data, size=20) - self.assertIsNotNone(layers.softmax(hid)) + self.assertIsNotNone(layers.softmax(hid, axis=1)) print(str(program)) def test_space_to_depth(self): @@ -1591,6 +1591,23 @@ class TestBook(unittest.TestCase): out = layers.spectral_norm(weight, dim=1, power_iters=1) self.assertIsNotNone(out) + def test_kldiv_loss(self): + program = Program() + with program_guard(program): + x = layers.data(name='x', shape=[32, 128, 128], dtype="float32") + target = layers.data( + name='target', shape=[32, 128, 128], dtype="float32") + loss = layers.kldiv_loss(x=x, target=target, reduction='batchmean') + self.assertIsNotNone(loss) + + print(str(program)) + + def test_temporal_shift(self): + program = Program() + with program_guard(program): + x = layers.data(name="X", shape=[16, 4, 4], dtype="float32") + out = layers.temporal_shift(x, seg_num=4, shift_ratio=0.2) + self.assertIsNotNone(out) print(str(program)) def test_shuffle_channel(self): diff --git a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py index 5212d97dfbc16e463e5f68456a3d735ac6679ae1..2108c2a9f53ac2b81d2e4477c0f1d038624bc05b 100644 --- a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py @@ -120,9 +120,9 @@ class TestLearningRateDecay(unittest.TestCase): self.assertAlmostEqual( python_decayed_lr, lr_val[0], - msg='Failed fn is {0}, Python result is {1}, Fluid result is {2}'. + msg='Failed lr scheduler is {0}, step {1}, Python result is {2}, Fluid result is {3}'. format(python_decay_fn.__name__, - str(python_decayed_lr), str(lr_val[0]))) + str(step), str(python_decayed_lr), str(lr_val[0]))) def test_decay(self): common_kwargs_true = { @@ -164,12 +164,53 @@ class TestLearningRateDecay(unittest.TestCase): ] for py_decay_fn, fluid_decay_fn, kwargs in decay_fns: - print("decay_fn=" + py_decay_fn.__name__ + " kwargs=" + str(kwargs)) + print("class=" + self.__class__.__name__ + "decay_fn=" + + py_decay_fn.__name__ + " kwargs=" + str(kwargs)) main_program = framework.Program() startup_program = framework.Program() with framework.program_guard(main_program, startup_program): self.check_decay(py_decay_fn, fluid_decay_fn, kwargs) +def linear_lr_warmup(global_step, warmup_steps, start_lr, end_lr): + linear_step = end_lr - start_lr + decayed_lr = start_lr + linear_step * (global_step / warmup_steps) + return decayed_lr + + +class TestLinearWamrupLearningRateDecay(TestLearningRateDecay): + def check_decay_with_place(self, place, python_decay_fn, fluid_decay_fn, + kwargs): + main_prog = fluid.Program() + startup_prog = fluid.Program() + + warmup_steps = 10 + start_lr = 1. / 3. + end_lr = 0.1 + + with fluid.program_guard(main_prog, startup_prog): + decayed_lr = layers.linear_lr_warmup( + fluid_decay_fn(**kwargs), warmup_steps, start_lr, end_lr) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_prog) + + for step in range(20): + lr_val, = exe.run(main_prog, feed={}, fetch_list=[decayed_lr]) + if step < warmup_steps: + python_decayed_lr = linear_lr_warmup( + float(step), warmup_steps, start_lr, end_lr) + else: + python_decayed_lr = python_decay_fn( + global_step=float(step), **kwargs) + self.assertAlmostEqual( + python_decayed_lr, + lr_val[0], + msg='Test {0} Failed, step {1}, Python result is {2}, Fluid result is {3}'. + format(python_decay_fn.__name__, + str(step), str(python_decayed_lr), str(lr_val[0]))) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py index cb1f5fdaee8253bbb3df3063ecca9859682f8bb0..0c5d3228f8345aeccc45f140a1ed97616a656d48 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py @@ -177,6 +177,9 @@ class TestMNIST(TestParallelExecutorBase): for use_fast_executor in (False, True): self.check_batchnorm_fc_convergence(use_cuda, use_fast_executor) + # FIXME(wuyi): should checkout why this fails when merging + # https://github.com/PaddlePaddle/Paddle/pull/16545 + @unittest.skip("should fix this later") def test_batchnorm_fc_with_new_strategy(self): # NOTE: the computation result of nccl_reduce is non-deterministic, # related issue: https://github.com/NVIDIA/nccl/issues/157 diff --git a/python/paddle/fluid/tests/unittests/test_softmax_op.py b/python/paddle/fluid/tests/unittests/test_softmax_op.py index 5c56de6779d238064f03a65b54f3c73a77119f60..8b071260285a1ff50e3c49ec0ac84f388fff97bf 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_op.py @@ -31,6 +31,9 @@ class TestSoftmaxOp(OpTest): def get_x_shape(self): return [10, 10] + def get_axis(self): + return -1 + def setUp(self): self.op_type = "softmax" self.use_cudnn = False @@ -38,15 +41,15 @@ class TestSoftmaxOp(OpTest): self.dtype = np.float32 self.init_kernel_type() self.shape = self.get_x_shape() + self.axis = self.get_axis() x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) - out = np.apply_along_axis(stable_softmax, 1, - x.reshape([-1, self.shape[-1]])) - out = out.reshape(self.shape) + out = np.apply_along_axis(stable_softmax, self.axis, x) self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} self.outputs = {'Out': out} self.attrs = { + 'axis': self.axis, 'use_cudnn': self.use_cudnn, 'use_mkldnn': self.use_mkldnn } @@ -76,6 +79,38 @@ class TestSoftmaxOp2(TestSoftmaxOp): return [2, 3, 4, 5] +class TestSoftmaxOp3(TestSoftmaxOp): + def get_x_shape(self): + return [2, 3, 4, 5] + + def get_axis(self): + return 0 + + +class TestSoftmaxOp4(TestSoftmaxOp): + def get_x_shape(self): + return [2, 3, 4, 5] + + def get_axis(self): + return 1 + + +class TestSoftmaxOp5(TestSoftmaxOp): + def get_x_shape(self): + return [2, 3, 4, 5] + + def get_axis(self): + return 2 + + +class TestSoftmaxOp5(TestSoftmaxOp): + def get_x_shape(self): + return [2, 3, 4, 5] + + def get_axis(self): + return 3 + + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp(TestSoftmaxOp): @@ -90,6 +125,16 @@ class TestSoftmaxCUDNNOp2(TestSoftmaxCUDNNOp): return [2, 3, 4, 5] +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestSoftmaxCUDNNOp5(TestSoftmaxCUDNNOp): + def get_x_shape(self): + return [2, 3, 4, 5] + + def get_axis(self): + return 3 + + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxFP16Op(TestSoftmaxOp): diff --git a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py new file mode 100644 index 0000000000000000000000000000000000000000..d469388ca079b6825c82c447cf574921d7da6f25 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py @@ -0,0 +1,81 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division + +import unittest +import numpy as np +from op_test import OpTest + +from paddle.fluid import core + + +def temporal_shift(x, seg_num, shift_ratio): + shape = x.shape + reshape_x = x.reshape((-1, seg_num, shape[1], shape[2], shape[3])) + pad_x = np.pad(reshape_x, ((0, 0), (1, 1), (0, 0), (0, 0), (0, 0)), + 'constant') + c1 = int(shape[1] * shift_ratio) + c2 = int(shape[1] * 2 * shift_ratio) + slice1 = pad_x[:, :seg_num, :c1, :, :] + slice2 = pad_x[:, 2:seg_num + 2, c1:c2, :, :] + slice3 = pad_x[:, 1:seg_num + 1, c2:, :, :] + concat_x = np.concatenate([slice1, slice2, slice3], axis=2) + return concat_x.reshape(shape) + + +class TestTemporalShift(OpTest): + def setUp(self): + self.initTestCase() + self.op_type = 'temporal_shift' + x = np.random.random(self.x_shape).astype('float32') + + self.attrs = { + "seg_num": self.seg_num, + "shift_ratio": self.shift_ratio, + } + + self.inputs = {"X": x, } + + output = temporal_shift(x, self.seg_num, self.shift_ratio) + self.outputs = {"Out": output} + + def test_check_output(self): + self.check_output() + + def test_check_grad_ignore_uv(self): + self.check_grad(['X'], 'Out') + + def initTestCase(self): + self.x_shape = (6, 4, 4, 4) + self.seg_num = 3 + self.shift_ratio = 0.25 + + +class TestTemporalShift2(TestTemporalShift): + def initTestCase(self): + self.x_shape = (4, 9, 7, 7) + self.seg_num = 2 + self.shift_ratio = 0.2 + + +class TestTemporalShift3(TestTemporalShift): + def initTestCase(self): + self.x_shape = (3, 10, 5, 5) + self.seg_num = 1 + self.shift_ratio = 0.3 + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_variable.py b/python/paddle/fluid/tests/unittests/test_variable.py index 076ee3baf96ab3c16f3ed9a3b9a15e2eb2aaed77..35e4af2d098dcb0a4ac63e2b65982bfc9dabf803 100644 --- a/python/paddle/fluid/tests/unittests/test_variable.py +++ b/python/paddle/fluid/tests/unittests/test_variable.py @@ -19,7 +19,6 @@ from paddle.fluid.framework import default_main_program, Program, convert_np_dty import paddle.fluid as fluid import paddle.fluid.core as core import numpy as np -from test_imperative_base import new_program_scope class TestVariable(unittest.TestCase): @@ -62,7 +61,7 @@ class TestVariable(unittest.TestCase): name='step_scopes', type=core.VarDesc.VarType.STEP_SCOPES) self.assertEqual(core.VarDesc.VarType.STEP_SCOPES, var.type) - def _test_slice(self): + def _test_slice(self, place): b = default_main_program().current_block() w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0) @@ -84,7 +83,6 @@ class TestVariable(unittest.TestCase): self.assertEqual(0, nw.lod_level) - place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): exe = fluid.Executor(place) @@ -101,10 +99,23 @@ class TestVariable(unittest.TestCase): var6 = var[1, 1:, 1:] var7 = var[1, ..., 1:] var8 = var[1, ...] + var_reshape = fluid.layers.reshape(var, [3, -1, 3]) + var9 = var_reshape[1, ..., 2] + var10 = var_reshape[:, :, -1] + + x = fluid.layers.data(name='x', shape=[13], dtype='float32') + y = fluid.layers.fc(input=x, size=1, act=None) + var11 = y[:, 0] + feeder = fluid.DataFeeder(place=place, feed_list=[x]) + data = [] + data.append((np.random.randint(10, size=[13]).astype('float32'))) + exe.run(fluid.default_startup_program()) + local_out = exe.run(main, + feed=feeder.feed([data]), fetch_list=[ var, var1, var2, var3, var4, var5, var6, - var7, var8 + var7, var8, var9, var10, var11 ]) self.assertTrue((np.array(local_out[1]) == np.array(tensor_array[ @@ -123,38 +134,16 @@ class TestVariable(unittest.TestCase): 1, ..., 1:])).all()) self.assertTrue((np.array(local_out[8]) == np.array(tensor_array[ 1, ...])).all()) + self.assertEqual(local_out[9].shape, (1, 3, 1)) + self.assertEqual(local_out[10].shape, (3, 3, 1)) + self.assertEqual(local_out[11].shape, (1, 1)) def test_slice(self): - self._test_slice() - - -class TestVariableImperative(unittest.TestCase): - def _test_slice(self): - b = default_main_program().current_block() - w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0) - - for i in range(3): - nw = w[i] - self.assertEqual([1, 100, 100], nw.shape) - - nw = w[:] - self.assertEqual([784, 100, 100], nw.shape) - - nw = w[:, :, :] - self.assertEqual([784, 100, 100], nw.shape) - - nw = w[::2, ::2, :] - self.assertEqual([392, 50, 100], nw.shape) - - nw = w[::-2, ::-2, :] - self.assertEqual([392, 50, 100], nw.shape) - - nw = w[0::-2, 0::-2, :] - self.assertEqual([1, 1, 100], nw.shape) + place = fluid.CPUPlace() + self._test_slice(place) - def test_slice(self): - with fluid.imperative.guard(): - self._test_slice() + if core.is_compiled_with_cuda(): + self._test_slice(core.CUDAPlace(0)) if __name__ == '__main__': diff --git a/python/setup.py.in b/python/setup.py.in index 9f87f5644fc969f3f55fd08689f3e2bbaf36dc39..75e821582f49f93cb41e4254edd11cb782d18cc7 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -102,7 +102,7 @@ packages=['paddle', 'paddle.reader', 'paddle.distributed', 'paddle.fluid', - 'paddle.fluid.imperative', + 'paddle.fluid.dygraph', 'paddle.fluid.proto', 'paddle.fluid.proto.profiler', 'paddle.fluid.distributed', @@ -119,6 +119,7 @@ packages=['paddle', 'paddle.fluid.contrib.slim.quantization', 'paddle.fluid.contrib.slim.distillation', 'paddle.fluid.contrib.utils', + 'paddle.fluid.contrib.extend_optimizer', 'paddle.fluid.transpiler', 'paddle.fluid.transpiler.details'] diff --git a/tools/print_signatures.py b/tools/print_signatures.py index d32b247342cc0c37b7bcff7b676cb47a4f429dfd..6a262529b5cac7e596e65d23de6cc4b5d720cacb 100644 --- a/tools/print_signatures.py +++ b/tools/print_signatures.py @@ -28,7 +28,7 @@ import hashlib member_dict = collections.OrderedDict() -experimental_namespace = {"paddle.fluid.imperative"} +experimental_namespace = {"paddle.fluid.dygraph"} def md5(doc):