提交 75994078 编写于 作者: H heqiaozhi

Merge remote-tracking branch 'upstream/develop' into dev

test=develop
...@@ -71,7 +71,8 @@ option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plan ...@@ -71,7 +71,8 @@ option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plan
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE}) option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(ON_INFER "Turn on inference optimization." OFF) option(ON_INFER "Turn on inference optimization." OFF)
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF) option(WITH_INFERENCE_API_TEST "Test fluid inference C++ high-level api interface" OFF)
option(WITH_HIGH_LEVEL_API_TEST "Test fluid python high-level api interface" OFF)
option(WITH_SYSTEM_BLAS "Use system blas library" OFF) option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION}) option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
option(WITH_FAST_MATH "Make use of fast math library, might affect the precision to some extent" ON) option(WITH_FAST_MATH "Make use of fast math library, might affect the precision to some extent" ON)
......
...@@ -221,6 +221,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -221,6 +221,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_INSTALL_LIBDIR=lib
-DBUILD_SHARED_LIBS=OFF
CMAKE_CACHE_ARGS CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
......
...@@ -13,6 +13,7 @@ paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, d ...@@ -13,6 +13,7 @@ paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, d
paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', '7d9a51fc9cf3c5245b5227080a8064c3')) paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', '7d9a51fc9cf3c5245b5227080a8064c3'))
paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', '4c0cd83f0b401fc2ff84c70974e5d210')) paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', '4c0cd83f0b401fc2ff84c70974e5d210'))
paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd0c3ebd813c39958c92b78e3eef7e912')) paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd0c3ebd813c39958c92b78e3eef7e912'))
paddle.fluid.in_dygraph_mode (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'f06314a1cb30c96b5808dde2219c2dae'))
paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03')) paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03'))
paddle.fluid.Executor.infer_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', '9c7decb955b9c4f718114179c8985581')) paddle.fluid.Executor.infer_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', '9c7decb955b9c4f718114179c8985581'))
...@@ -117,6 +118,8 @@ paddle.fluid.layers.reduce_mean (ArgSpec(args=['input', 'dim', 'keep_dim', 'name ...@@ -117,6 +118,8 @@ paddle.fluid.layers.reduce_mean (ArgSpec(args=['input', 'dim', 'keep_dim', 'name
paddle.fluid.layers.reduce_max (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', '66a622db727551761ce4eb73eaa7f6a4')) paddle.fluid.layers.reduce_max (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', '66a622db727551761ce4eb73eaa7f6a4'))
paddle.fluid.layers.reduce_min (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd50ac552b5d131468ed466d08bb2d38c')) paddle.fluid.layers.reduce_min (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd50ac552b5d131468ed466d08bb2d38c'))
paddle.fluid.layers.reduce_prod (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'fcd8301a0ce15f219c7a4bcd0c1e8eca')) paddle.fluid.layers.reduce_prod (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'fcd8301a0ce15f219c7a4bcd0c1e8eca'))
paddle.fluid.layers.reduce_all (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', '646ca4d4a2cc16084f59de44b6927eca'))
paddle.fluid.layers.reduce_any (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'f36661060aeeaf6c6b1331e41b3726fa'))
paddle.fluid.layers.sequence_first_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '2b290d3d77882bfe9bb8d331cac8cdd3')) paddle.fluid.layers.sequence_first_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '2b290d3d77882bfe9bb8d331cac8cdd3'))
paddle.fluid.layers.sequence_last_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'c16a892f44f7fe71bfa5afc32d3f34ce')) paddle.fluid.layers.sequence_last_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'c16a892f44f7fe71bfa5afc32d3f34ce'))
paddle.fluid.layers.sequence_slice (ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'fdcea0e8b5bc7d8d4b1b072c521014e6')) paddle.fluid.layers.sequence_slice (ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'fdcea0e8b5bc7d8d4b1b072c521014e6'))
...@@ -124,7 +127,7 @@ paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed ...@@ -124,7 +127,7 @@ paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed
paddle.fluid.layers.split (ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '652625345c2acb900029c78cc75f8aa6')) paddle.fluid.layers.split (ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '652625345c2acb900029c78cc75f8aa6'))
paddle.fluid.layers.ctc_greedy_decoder (ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbf2adbd79683dc93db03454dfa18c2')) paddle.fluid.layers.ctc_greedy_decoder (ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbf2adbd79683dc93db03454dfa18c2'))
paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)), ('document', '97f0262f97602644c83142789d784571')) paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)), ('document', '97f0262f97602644c83142789d784571'))
paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', '6e428384ce6a77207fa2c70d9f011990')) paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', '35c6a241bcc1a1fc89508860d82ad62b'))
paddle.fluid.layers.matmul (ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)), ('document', 'b4cbe1ac451005df6dad12e9ffdccca9')) paddle.fluid.layers.matmul (ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)), ('document', 'b4cbe1ac451005df6dad12e9ffdccca9'))
paddle.fluid.layers.topk (ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd3570c02f71bcd78e60b3f31dc8f5b32')) paddle.fluid.layers.topk (ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd3570c02f71bcd78e60b3f31dc8f5b32'))
paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, False, False)), ('document', 'aaba49c038ba927f0a8e45c0c9a686ab')) paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, False, False)), ('document', 'aaba49c038ba927f0a8e45c0c9a686ab'))
...@@ -155,10 +158,10 @@ paddle.fluid.layers.label_smooth (ArgSpec(args=['label', 'prior_dist', 'epsilon' ...@@ -155,10 +158,10 @@ paddle.fluid.layers.label_smooth (ArgSpec(args=['label', 'prior_dist', 'epsilon'
paddle.fluid.layers.roi_pool (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)), ('document', 'c317aa595deb31649083c8faa91cdb97')) paddle.fluid.layers.roi_pool (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)), ('document', 'c317aa595deb31649083c8faa91cdb97'))
paddle.fluid.layers.roi_align (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None)), ('document', '12c5bbb8b38c42e623fbc47611d766e1')) paddle.fluid.layers.roi_align (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None)), ('document', '12c5bbb8b38c42e623fbc47611d766e1'))
paddle.fluid.layers.dice_loss (ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,)), ('document', '1ba0508d573f65feecf3564dce22aa1d')) paddle.fluid.layers.dice_loss (ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,)), ('document', '1ba0508d573f65feecf3564dce22aa1d'))
paddle.fluid.layers.image_resize (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1)), ('document', '7a1966d7c3a48f1fc0881cdaf5d83b0b')) paddle.fluid.layers.image_resize (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1)), ('document', 'd1b08c11bb9277386fcf6ae70b6622d1'))
paddle.fluid.layers.image_resize_short (ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',)), ('document', '06211aefc50c5a3e940d7204d859cdf7')) paddle.fluid.layers.image_resize_short (ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',)), ('document', '06211aefc50c5a3e940d7204d859cdf7'))
paddle.fluid.layers.resize_bilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1)), ('document', 'e4fb4ed511b2293b8f04f7e872afbfd7')) paddle.fluid.layers.resize_bilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1)), ('document', 'c45591fbc4f64a178fbca219e1546a58'))
paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True)), ('document', '735fa9758a6d7ff3b47d7b827f961c1d')) paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True)), ('document', 'ae6d73cdc7f3a138d8a338ecdb33c1ae'))
paddle.fluid.layers.gather (ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None), ('document', '98f1c86716b9b7f4dda83f20e2adeee2')) paddle.fluid.layers.gather (ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None), ('document', '98f1c86716b9b7f4dda83f20e2adeee2'))
paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '65f8e9d8ddfd0b412f940579c4faa342')) paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '65f8e9d8ddfd0b412f940579c4faa342'))
paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '15b522457dfef103f0c20ca9d397678b')) paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '15b522457dfef103f0c20ca9d397678b'))
...@@ -203,6 +206,7 @@ paddle.fluid.layers.gaussian_random_batch_size_like (ArgSpec(args=['input', 'sha ...@@ -203,6 +206,7 @@ paddle.fluid.layers.gaussian_random_batch_size_like (ArgSpec(args=['input', 'sha
paddle.fluid.layers.sum (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'a418e3ccb5e2ac21bd60f5cc221d5860')) paddle.fluid.layers.sum (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'a418e3ccb5e2ac21bd60f5cc221d5860'))
paddle.fluid.layers.slice (ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None), ('document', '01dbb91e7c74cb11336cd531013de51a')) paddle.fluid.layers.slice (ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None), ('document', '01dbb91e7c74cb11336cd531013de51a'))
paddle.fluid.layers.shape (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '17db0f814eb7bb5a3fac1ca6e60e16d8')) paddle.fluid.layers.shape (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '17db0f814eb7bb5a3fac1ca6e60e16d8'))
paddle.fluid.layers.rank (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'ee1386c42ecc8f424fe3fb21862fefc2'))
paddle.fluid.layers.logical_and (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cdcf20c494c92060d10feb9374532f42')) paddle.fluid.layers.logical_and (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cdcf20c494c92060d10feb9374532f42'))
paddle.fluid.layers.logical_or (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '0eae3f726a4afe590757552fa3ced012')) paddle.fluid.layers.logical_or (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '0eae3f726a4afe590757552fa3ced012'))
paddle.fluid.layers.logical_xor (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b0daaa3fa4a0aa62f9b58c43d959eb25')) paddle.fluid.layers.logical_xor (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b0daaa3fa4a0aa62f9b58c43d959eb25'))
...@@ -235,7 +239,7 @@ paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], vararg ...@@ -235,7 +239,7 @@ paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], vararg
paddle.fluid.layers.kldiv_loss (ArgSpec(args=['x', 'target', 'reduction', 'name'], varargs=None, keywords=None, defaults=('mean', None)), ('document', '776d536cac47c89073abc7ee524d5aec')) paddle.fluid.layers.kldiv_loss (ArgSpec(args=['x', 'target', 'reduction', 'name'], varargs=None, keywords=None, defaults=('mean', None)), ('document', '776d536cac47c89073abc7ee524d5aec'))
paddle.fluid.layers.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '34ea12ac9f10a65dccbc50100d12e607')) paddle.fluid.layers.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '34ea12ac9f10a65dccbc50100d12e607'))
paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l2_reg'], varargs=None, keywords=None, defaults=(0.002,)), ('document', '46994d10276dd4cb803b4062b5d14329')) paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l2_reg'], varargs=None, keywords=None, defaults=(0.002,)), ('document', '46994d10276dd4cb803b4062b5d14329'))
paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', 'ad669cdf83e72a69ebc5ed79e36486de')) paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', '731b21c62a4add60a33bd76d802ffc5c'))
paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'b76ccca3735bea4a58a0dbf0d77c5393')) paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'b76ccca3735bea4a58a0dbf0d77c5393'))
paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_cvm'], varargs=None, keywords=None, defaults=(True,)), ('document', '88046160ef4bbd28f18fa6484d95b75c')) paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_cvm'], varargs=None, keywords=None, defaults=(True,)), ('document', '88046160ef4bbd28f18fa6484d95b75c'))
paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '33bbd42027d872b3818b3d64ec52e139')) paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '33bbd42027d872b3818b3d64ec52e139'))
...@@ -272,6 +276,7 @@ paddle.fluid.layers.has_inf (ArgSpec(args=['x'], varargs=None, keywords=None, de ...@@ -272,6 +276,7 @@ paddle.fluid.layers.has_inf (ArgSpec(args=['x'], varargs=None, keywords=None, de
paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '2e53e83127dbfd86e7098bdfe9a549e8')) paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '2e53e83127dbfd86e7098bdfe9a549e8'))
paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '0a437011c3906079fd8947ed3e52d292')) paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '0a437011c3906079fd8947ed3e52d292'))
paddle.fluid.layers.range (ArgSpec(args=['start', 'end', 'step', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '2ec937ede953ded2fdff2675883900bb')) paddle.fluid.layers.range (ArgSpec(args=['start', 'end', 'step', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '2ec937ede953ded2fdff2675883900bb'))
paddle.fluid.layers.linspace (ArgSpec(args=['start', 'stop', 'num', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '495e21e9a848c2d075a102802fc67756'))
paddle.fluid.layers.While.__init__ (ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.While.__init__ (ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.While.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.While.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Switch.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.Switch.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
...@@ -361,8 +366,7 @@ paddle.fluid.layers.inverse_time_decay (ArgSpec(args=['learning_rate', 'decay_st ...@@ -361,8 +366,7 @@ paddle.fluid.layers.inverse_time_decay (ArgSpec(args=['learning_rate', 'decay_st
paddle.fluid.layers.polynomial_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False)), ('document', '882634f420f626642f0874481263da40')) paddle.fluid.layers.polynomial_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False)), ('document', '882634f420f626642f0874481263da40'))
paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None), ('document', 'c717d9d1d78a53c809d01b8bc56f3cae')) paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None), ('document', 'c717d9d1d78a53c809d01b8bc56f3cae'))
paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28')) paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28'))
paddle.fluid.layers.append_LARS (ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None), ('document', 'd24fa1e7d62ac8a534fc6a86002f84f8')) paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', 'f8b2727bccf0f368c997d7cf05847e49'))
paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', '9588c64c26ffaef3c466e404a6af9d9b'))
paddle.fluid.layers.linear_lr_warmup (ArgSpec(args=['learning_rate', 'warmup_steps', 'start_lr', 'end_lr'], varargs=None, keywords=None, defaults=None), ('document', '2ef3f5ca5cd71ea4217c418e5a7a0565')) paddle.fluid.layers.linear_lr_warmup (ArgSpec(args=['learning_rate', 'warmup_steps', 'start_lr', 'end_lr'], varargs=None, keywords=None, defaults=None), ('document', '2ef3f5ca5cd71ea4217c418e5a7a0565'))
paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.StateCell.__init__ (ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.StateCell.__init__ (ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
......
...@@ -72,7 +72,6 @@ bool DataFeed::PickOneFile(std::string* filename) { ...@@ -72,7 +72,6 @@ bool DataFeed::PickOneFile(std::string* filename) {
} }
VLOG(3) << "file_idx_=" << *file_idx_; VLOG(3) << "file_idx_=" << *file_idx_;
*filename = filelist_[(*file_idx_)++]; *filename = filelist_[(*file_idx_)++];
// LOG(ERROR) << "pick file:" << *filename;
return true; return true;
} }
...@@ -466,6 +465,17 @@ void MultiSlotDataFeed::Init( ...@@ -466,6 +465,17 @@ void MultiSlotDataFeed::Init(
if (slot.is_used()) { if (slot.is_used()) {
use_slots_.push_back(all_slots_[i]); use_slots_.push_back(all_slots_[i]);
use_slots_is_dense_.push_back(slot.is_dense()); use_slots_is_dense_.push_back(slot.is_dense());
std::vector<int> local_shape;
if (slot.is_dense()) {
// for batch size holder if is_dense
if (slot.shape(0) > 0) {
local_shape.push_back(0);
}
}
for (size_t i = 0; i < slot.shape_size(); ++i) {
local_shape.push_back(slot.shape(i));
}
use_slots_shape_.push_back(local_shape);
} }
} }
feed_vec_.resize(use_slots_.size()); feed_vec_.resize(use_slots_.size());
...@@ -752,8 +762,8 @@ void MultiSlotDataFeed::PutToFeedVec( ...@@ -752,8 +762,8 @@ void MultiSlotDataFeed::PutToFeedVec(
LoD data_lod{offset}; LoD data_lod{offset};
feed_vec_[i]->set_lod(data_lod); feed_vec_[i]->set_lod(data_lod);
if (use_slots_is_dense_[i]) { if (use_slots_is_dense_[i]) {
int dim = total_instance / batch_size_; use_slots_shape_[i][0] = batch_size_;
feed_vec_[i]->Resize({batch_size_, dim}); feed_vec_[i]->Resize(framework::make_ddim(use_slots_shape_[i]));
} }
} }
#endif #endif
...@@ -785,6 +795,16 @@ void MultiSlotInMemoryDataFeed::Init( ...@@ -785,6 +795,16 @@ void MultiSlotInMemoryDataFeed::Init(
if (slot.is_used()) { if (slot.is_used()) {
use_slots_.push_back(all_slots_[i]); use_slots_.push_back(all_slots_[i]);
use_slots_is_dense_.push_back(slot.is_dense()); use_slots_is_dense_.push_back(slot.is_dense());
std::vector<int> local_shape;
if (slot.is_dense()) {
if (slot.shape(0) > 0) {
local_shape.push_back(0);
}
}
for (size_t i = 0; i < slot.shape_size(); ++i) {
local_shape.push_back(slot.shape(i));
}
use_slots_shape_.push_back(local_shape);
} }
} }
feed_vec_.resize(use_slots_.size()); feed_vec_.resize(use_slots_.size());
...@@ -940,8 +960,8 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec( ...@@ -940,8 +960,8 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec(
LoD data_lod{offset}; LoD data_lod{offset};
feed_vec_[i]->set_lod(data_lod); feed_vec_[i]->set_lod(data_lod);
if (use_slots_is_dense_[i]) { if (use_slots_is_dense_[i]) {
int dim = total_instance / batch_size_; use_slots_shape_[i][0] = batch_size_;
feed_vec_[i]->Resize({batch_size_, dim}); feed_vec_[i]->Resize(framework::make_ddim(use_slots_shape_[i]));
} }
} }
#endif #endif
......
...@@ -142,6 +142,7 @@ class DataFeed { ...@@ -142,6 +142,7 @@ class DataFeed {
// object) // object)
std::vector<std::string> all_slots_; std::vector<std::string> all_slots_;
std::vector<std::string> all_slots_type_; std::vector<std::string> all_slots_type_;
std::vector<std::vector<int>> use_slots_shape_;
std::vector<int> std::vector<int>
use_slots_index_; // -1: not used; >=0: the index of use_slots_ use_slots_index_; // -1: not used; >=0: the index of use_slots_
......
...@@ -19,6 +19,7 @@ message Slot { ...@@ -19,6 +19,7 @@ message Slot {
required string type = 2; required string type = 2;
optional bool is_dense = 3 [ default = false ]; optional bool is_dense = 3 [ default = false ];
optional bool is_used = 4 [ default = false ]; optional bool is_used = 4 [ default = false ];
repeated int32 shape = 5; // we can define N-D Tensor
} }
message MultiSlotDesc { repeated Slot slots = 1; } message MultiSlotDesc { repeated Slot slots = 1; }
......
...@@ -150,6 +150,11 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { ...@@ -150,6 +150,11 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
AppendPass("runtime_context_cache_pass"); AppendPass("runtime_context_cache_pass");
} }
if (strategy_.cache_expected_kernel_) {
VLOG(10) << "Add expected_kernel_cache_pass";
AppendPass("expected_kernel_cache_pass");
}
AppendMultiDevPass(strategy_); AppendMultiDevPass(strategy_);
if (strategy_.fuse_all_reduce_ops_) { if (strategy_.fuse_all_reduce_ops_) {
...@@ -337,3 +342,4 @@ USE_PASS(fuse_adam_op_pass); ...@@ -337,3 +342,4 @@ USE_PASS(fuse_adam_op_pass);
USE_PASS(fuse_sgd_op_pass); USE_PASS(fuse_sgd_op_pass);
USE_PASS(fuse_all_reduce_op_pass); USE_PASS(fuse_all_reduce_op_pass);
USE_PASS(runtime_context_cache_pass); USE_PASS(runtime_context_cache_pass);
USE_PASS(expected_kernel_cache_pass);
...@@ -83,11 +83,11 @@ struct BuildStrategy { ...@@ -83,11 +83,11 @@ struct BuildStrategy {
bool sync_batch_norm_{false}; bool sync_batch_norm_{false};
bool memory_optimize_{true}; // FIXME(liuwei1031) disable memory_optimzie and enable_inplace in 1.4
// TODO(dzhwinter): // to open them by default, we need to solve the fetch variable issue
// make enable_inplace, memory_optimize_ bool memory_optimize_{false};
// memory_early_delete_ true by default
bool enable_inplace_{true}; bool enable_inplace_{false};
bool enable_sequential_execution_{false}; bool enable_sequential_execution_{false};
...@@ -108,6 +108,7 @@ struct BuildStrategy { ...@@ -108,6 +108,7 @@ struct BuildStrategy {
bool remove_unnecessary_lock_{true}; bool remove_unnecessary_lock_{true};
bool cache_runtime_context_{false}; bool cache_runtime_context_{false};
bool cache_expected_kernel_{true};
// NOTE: // NOTE:
// Before you add new options, think if it's a general strategy that works // Before you add new options, think if it's a general strategy that works
......
...@@ -305,6 +305,12 @@ void InplacePass::TryInplaceOpInputOutput(ir::Node* op, ...@@ -305,6 +305,12 @@ void InplacePass::TryInplaceOpInputOutput(ir::Node* op,
VLOG(4) << "Try to inplace " << in_var_name << " with " << out_var_name; VLOG(4) << "Try to inplace " << in_var_name << " with " << out_var_name;
if (var_nodes_[in_var_name].back() != in_node) {
VLOG(4) << "SKIP since " << in_var_name
<< " is also used as output by other ops";
continue;
}
bool can_replace = true; bool can_replace = true;
if (in_var_name == out_var_name) { if (in_var_name == out_var_name) {
can_replace = false; can_replace = false;
...@@ -527,6 +533,9 @@ void GraphView::Build(ir::Graph* g) { ...@@ -527,6 +533,9 @@ void GraphView::Build(ir::Graph* g) {
}; };
for (auto& node : g->Nodes()) { for (auto& node : g->Nodes()) {
if (!node->IsOp()) continue; if (!node->IsOp()) continue;
// avoid optimize the variable used in sub-blocks
if (OpHasSubBlock(node->Op())) update_skip_set(node);
if (node->Name() == "send") update_skip_set(node); if (node->Name() == "send") update_skip_set(node);
if (node->Name() == "recv") update_skip_set(node); if (node->Name() == "recv") update_skip_set(node);
if (node->Name() == "prefetch") update_skip_set(node); if (node->Name() == "prefetch") update_skip_set(node);
......
...@@ -233,6 +233,12 @@ struct OpInfoFiller<T, kNoNeedBufferVarsInference> { ...@@ -233,6 +233,12 @@ struct OpInfoFiller<T, kNoNeedBufferVarsInference> {
} }
}; };
// A fake OpInfoFiller of void
template <>
struct OpInfoFiller<void, kUnknown> {
void operator()(const char* op_type, OpInfo* info) const {}
};
} // namespace details } // namespace details
} // namespace framework } // namespace framework
......
...@@ -21,40 +21,40 @@ namespace framework { ...@@ -21,40 +21,40 @@ namespace framework {
void DownpourWorker::Initialize(const TrainerDesc& desc) { void DownpourWorker::Initialize(const TrainerDesc& desc) {
param_ = desc.downpour_param(); param_ = desc.downpour_param();
for (size_t i = 0; i < param_.sparse_table_size(); ++i) { for (int i = 0; i < param_.sparse_table_size(); ++i) {
uint64_t table_id = uint64_t table_id =
static_cast<uint64_t>(param_.sparse_table(i).table_id()); static_cast<uint64_t>(param_.sparse_table(i).table_id());
TableParameter table = param_.sparse_table(i); TableParameter table = param_.sparse_table(i);
sparse_key_names_[table_id].resize(table.sparse_key_name_size()); sparse_key_names_[table_id].resize(table.sparse_key_name_size());
for (size_t j = 0; j < table.sparse_key_name_size(); ++j) { for (int j = 0; j < table.sparse_key_name_size(); ++j) {
sparse_key_names_[table_id][j] = table.sparse_key_name(j); sparse_key_names_[table_id][j] = table.sparse_key_name(j);
} }
sparse_value_names_[table_id].resize(table.sparse_value_name_size()); sparse_value_names_[table_id].resize(table.sparse_value_name_size());
for (size_t j = 0; j < table.sparse_value_name_size(); ++j) { for (int j = 0; j < table.sparse_value_name_size(); ++j) {
sparse_value_names_[table_id][j] = table.sparse_value_name(j); sparse_value_names_[table_id][j] = table.sparse_value_name(j);
} }
sparse_grad_names_[table_id].resize(table.sparse_grad_name_size()); sparse_grad_names_[table_id].resize(table.sparse_grad_name_size());
for (size_t j = 0; j < table.sparse_grad_name_size(); ++j) { for (int j = 0; j < table.sparse_grad_name_size(); ++j) {
sparse_grad_names_[table_id][j] = table.sparse_grad_name(j); sparse_grad_names_[table_id][j] = table.sparse_grad_name(j);
} }
label_var_name_[table_id] = table.label_var_name(); label_var_name_[table_id] = table.label_var_name();
} }
for (size_t i = 0; i < param_.dense_table_size(); ++i) { for (int i = 0; i < param_.dense_table_size(); ++i) {
uint64_t table_id = static_cast<uint64_t>(param_.dense_table(i).table_id()); uint64_t table_id = static_cast<uint64_t>(param_.dense_table(i).table_id());
auto table = param_.dense_table(i); auto table = param_.dense_table(i);
dense_value_names_[table_id].resize(table.dense_value_name_size()); dense_value_names_[table_id].resize(table.dense_value_name_size());
for (size_t j = 0; j < table.dense_value_name_size(); ++j) { for (int j = 0; j < table.dense_value_name_size(); ++j) {
dense_value_names_[table_id][j] = table.dense_value_name(j); dense_value_names_[table_id][j] = table.dense_value_name(j);
} }
dense_grad_names_[table_id].resize(table.dense_grad_name_size()); dense_grad_names_[table_id].resize(table.dense_grad_name_size());
for (size_t j = 0; j < table.dense_grad_name_size(); ++j) { for (int j = 0; j < table.dense_grad_name_size(); ++j) {
dense_grad_names_[table_id][j] = table.dense_grad_name(j); dense_grad_names_[table_id][j] = table.dense_grad_name(j);
} }
} }
skip_ops_.resize(param_.skip_ops_size()); skip_ops_.resize(param_.skip_ops_size());
for (size_t i = 0; i < param_.skip_ops_size(); ++i) { for (int i = 0; i < param_.skip_ops_size(); ++i) {
skip_ops_[i] = param_.skip_ops(i); skip_ops_[i] = param_.skip_ops(i);
} }
...@@ -83,14 +83,14 @@ void DownpourWorker::CollectLabelInfo(size_t table_idx) { ...@@ -83,14 +83,14 @@ void DownpourWorker::CollectLabelInfo(size_t table_idx) {
LoDTensor* tensor = var->GetMutable<LoDTensor>(); LoDTensor* tensor = var->GetMutable<LoDTensor>();
int64_t* label_ptr = tensor->data<int64_t>(); int64_t* label_ptr = tensor->data<int64_t>();
int global_index = 0; size_t global_index = 0;
for (size_t i = 0; i < sparse_key_names_[table_id].size(); ++i) { for (size_t i = 0; i < sparse_key_names_[table_id].size(); ++i) {
VLOG(3) << "sparse_key_names_[" << i VLOG(3) << "sparse_key_names_[" << i
<< "]: " << sparse_key_names_[table_id][i]; << "]: " << sparse_key_names_[table_id][i];
Variable* fea_var = thread_scope_->FindVar(sparse_key_names_[table_id][i]); Variable* fea_var = thread_scope_->FindVar(sparse_key_names_[table_id][i]);
LoDTensor* tensor = fea_var->GetMutable<LoDTensor>(); LoDTensor* tensor = fea_var->GetMutable<LoDTensor>();
int64_t* ids = tensor->data<int64_t>(); int64_t* ids = tensor->data<int64_t>();
int fea_idx = 0; size_t fea_idx = 0;
// tensor->lod()[0].size() == batch_size + 1 // tensor->lod()[0].size() == batch_size + 1
for (auto lod_idx = 1u; lod_idx < tensor->lod()[0].size(); ++lod_idx) { for (auto lod_idx = 1u; lod_idx < tensor->lod()[0].size(); ++lod_idx) {
for (; fea_idx < tensor->lod()[0][lod_idx]; ++fea_idx) { for (; fea_idx < tensor->lod()[0][lod_idx]; ++fea_idx) {
...@@ -138,7 +138,7 @@ void DownpourWorker::FillSparseValue(size_t table_idx) { ...@@ -138,7 +138,7 @@ void DownpourWorker::FillSparseValue(size_t table_idx) {
auto& tensor_lod = tensor->lod()[0]; auto& tensor_lod = tensor->lod()[0];
LoD data_lod{tensor_lod}; LoD data_lod{tensor_lod};
tensor_emb->set_lod(data_lod); tensor_emb->set_lod(data_lod);
for (auto index = 0u; index < len; ++index) { for (int index = 0; index < len; ++index) {
if (ids[index] == 0u) { if (ids[index] == 0u) {
memcpy(ptr + table.emb_dim() * index, init_value.data() + 2, memcpy(ptr + table.emb_dim() * index, init_value.data() + 2,
sizeof(float) * table.emb_dim()); sizeof(float) * table.emb_dim());
...@@ -192,7 +192,7 @@ void DownpourWorker::TrainFilesWithProfiler() { ...@@ -192,7 +192,7 @@ void DownpourWorker::TrainFilesWithProfiler() {
read_time += timeline.ElapsedSec(); read_time += timeline.ElapsedSec();
total_time += timeline.ElapsedSec(); total_time += timeline.ElapsedSec();
VLOG(3) << "program config size: " << param_.program_config_size(); VLOG(3) << "program config size: " << param_.program_config_size();
for (size_t i = 0; i < param_.program_config(0).pull_sparse_table_id_size(); for (int i = 0; i < param_.program_config(0).pull_sparse_table_id_size();
++i) { ++i) {
uint64_t tid = static_cast<uint64_t>( uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).pull_sparse_table_id(i)); param_.program_config(0).pull_sparse_table_id(i));
...@@ -244,8 +244,8 @@ void DownpourWorker::TrainFilesWithProfiler() { ...@@ -244,8 +244,8 @@ void DownpourWorker::TrainFilesWithProfiler() {
} }
if (need_to_push_sparse_) { if (need_to_push_sparse_) {
for (size_t i = 0; for (int i = 0; i < param_.program_config(0).push_sparse_table_id_size();
i < param_.program_config(0).push_sparse_table_id_size(); ++i) { ++i) {
uint64_t tid = static_cast<uint64_t>( uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_sparse_table_id(i)); param_.program_config(0).push_sparse_table_id(i));
TableParameter table; TableParameter table;
...@@ -268,8 +268,8 @@ void DownpourWorker::TrainFilesWithProfiler() { ...@@ -268,8 +268,8 @@ void DownpourWorker::TrainFilesWithProfiler() {
if (need_to_push_dense_) { if (need_to_push_dense_) {
timeline.Start(); timeline.Start();
for (size_t i = 0; for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
i < param_.program_config(0).push_dense_table_id_size(); ++i) { ++i) {
uint64_t tid = static_cast<uint64_t>( uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_dense_table_id(i)); param_.program_config(0).push_dense_table_id(i));
fleet_ptr_->PushDenseVarsAsync( fleet_ptr_->PushDenseVarsAsync(
...@@ -315,8 +315,8 @@ void DownpourWorker::TrainFilesWithProfiler() { ...@@ -315,8 +315,8 @@ void DownpourWorker::TrainFilesWithProfiler() {
} }
if (need_to_push_dense_) { if (need_to_push_dense_) {
for (size_t i = 0; for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
i < param_.program_config(0).push_dense_table_id_size(); ++i) { ++i) {
uint64_t tid = static_cast<uint64_t>( uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_dense_table_id(i)); param_.program_config(0).push_dense_table_id(i));
pull_dense_worker_->IncreaseThreadVersion(thread_id_, tid); pull_dense_worker_->IncreaseThreadVersion(thread_id_, tid);
...@@ -362,7 +362,7 @@ void DownpourWorker::TrainFiles() { ...@@ -362,7 +362,7 @@ void DownpourWorker::TrainFiles() {
int cur_batch; int cur_batch;
while ((cur_batch = device_reader_->Next()) > 0) { while ((cur_batch = device_reader_->Next()) > 0) {
// pull sparse here // pull sparse here
for (size_t i = 0; i < param_.program_config(0).pull_sparse_table_id_size(); for (int i = 0; i < param_.program_config(0).pull_sparse_table_id_size();
++i) { ++i) {
uint64_t tid = static_cast<uint64_t>( uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).pull_sparse_table_id(i)); param_.program_config(0).pull_sparse_table_id(i));
...@@ -397,8 +397,8 @@ void DownpourWorker::TrainFiles() { ...@@ -397,8 +397,8 @@ void DownpourWorker::TrainFiles() {
if (need_to_push_sparse_) { if (need_to_push_sparse_) {
// push gradients here // push gradients here
for (size_t i = 0; for (int i = 0; i < param_.program_config(0).push_sparse_table_id_size();
i < param_.program_config(0).push_sparse_table_id_size(); ++i) { ++i) {
uint64_t tid = static_cast<uint64_t>( uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_sparse_table_id(i)); param_.program_config(0).push_sparse_table_id(i));
TableParameter table; TableParameter table;
...@@ -416,8 +416,8 @@ void DownpourWorker::TrainFiles() { ...@@ -416,8 +416,8 @@ void DownpourWorker::TrainFiles() {
} }
if (need_to_push_dense_) { if (need_to_push_dense_) {
for (size_t i = 0; for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
i < param_.program_config(0).push_dense_table_id_size(); ++i) { ++i) {
uint64_t tid = static_cast<uint64_t>( uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_dense_table_id(i)); param_.program_config(0).push_dense_table_id(i));
fleet_ptr_->PushDenseVarsAsync( fleet_ptr_->PushDenseVarsAsync(
...@@ -461,8 +461,8 @@ void DownpourWorker::TrainFiles() { ...@@ -461,8 +461,8 @@ void DownpourWorker::TrainFiles() {
} }
if (need_to_push_dense_) { if (need_to_push_dense_) {
for (size_t i = 0; for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
i < param_.program_config(0).push_dense_table_id_size(); ++i) { ++i) {
uint64_t tid = static_cast<uint64_t>( uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_dense_table_id(i)); param_.program_config(0).push_dense_table_id(i));
pull_dense_worker_->IncreaseThreadVersion(thread_id_, tid); pull_dense_worker_->IncreaseThreadVersion(thread_id_, tid);
......
...@@ -23,7 +23,7 @@ namespace ir { ...@@ -23,7 +23,7 @@ namespace ir {
void ExpectedKernelCachePass::ApplyImpl(ir::Graph* graph) const { void ExpectedKernelCachePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "Applies Expected Kernel Cache strategy."; VLOG(3) << "Applies Expected Kernel Cache strategy.";
for (const Node* n : graph->Nodes()) { for (const Node* n : graph->Nodes()) {
if (n->IsOp()) { if (n->IsOp() && n->Op()) {
n->Op()->SetAttr(kEnableCacheExpectedKernel, true); n->Op()->SetAttr(kEnableCacheExpectedKernel, true);
} }
} }
......
...@@ -31,10 +31,10 @@ namespace paddle { ...@@ -31,10 +31,10 @@ namespace paddle {
namespace framework { namespace framework {
namespace ir { namespace ir {
namespace { namespace {
void SortHelper( void SortHelper(const std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>,
const std::map<ir::Node *, std::unordered_set<ir::Node *>> &adj_list, ir::NodeComp> &adj_list,
ir::Node *node, std::unordered_set<ir::Node *> *visited, ir::Node *node, std::unordered_set<ir::Node *> *visited,
std::vector<ir::Node *> *ret) { std::vector<ir::Node *> *ret) {
visited->insert(node); visited->insert(node);
for (auto adj : adj_list.at(node)) { for (auto adj : adj_list.at(node)) {
...@@ -50,7 +50,8 @@ void SortHelper( ...@@ -50,7 +50,8 @@ void SortHelper(
bool HasCircleHelper( bool HasCircleHelper(
ir::Node *node, ir::Node *node,
const std::map<ir::Node *, std::unordered_set<ir::Node *>> &adj_list, const std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>, ir::NodeComp>
&adj_list,
std::unordered_set<ir::Node *> *visited, std::unordered_set<ir::Node *> *visited,
std::unordered_set<ir::Node *> *in_trace, std::unordered_set<ir::Node *> *in_trace,
std::vector<std::vector<ir::Node *>> *circles) { std::vector<std::vector<ir::Node *>> *circles) {
...@@ -84,7 +85,8 @@ bool HasCircleHelper( ...@@ -84,7 +85,8 @@ bool HasCircleHelper(
} }
bool HasCircleInternal( bool HasCircleInternal(
const std::map<ir::Node *, std::unordered_set<ir::Node *>> &adj_list, const std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>, ir::NodeComp>
&adj_list,
std::vector<std::vector<ir::Node *>> *circles) { std::vector<std::vector<ir::Node *>> *circles) {
std::unordered_set<ir::Node *> visited; std::unordered_set<ir::Node *> visited;
std::unordered_set<ir::Node *> in_trace; std::unordered_set<ir::Node *> in_trace;
...@@ -107,8 +109,8 @@ bool FindCircleSubGraph(const Graph &graph, ...@@ -107,8 +109,8 @@ bool FindCircleSubGraph(const Graph &graph,
} }
std::vector<ir::Node *> TopologySortOperations(const Graph &graph) { std::vector<ir::Node *> TopologySortOperations(const Graph &graph) {
std::map<ir::Node *, std::unordered_set<ir::Node *>> adj_list = std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>, ir::NodeComp>
BuildOperationAdjList(graph); adj_list = BuildOperationAdjList(graph);
PADDLE_ENFORCE(!HasCircleInternal(adj_list, nullptr)); PADDLE_ENFORCE(!HasCircleInternal(adj_list, nullptr));
std::unordered_set<ir::Node *> visited; std::unordered_set<ir::Node *> visited;
std::vector<ir::Node *> ret; std::vector<ir::Node *> ret;
...@@ -117,34 +119,30 @@ std::vector<ir::Node *> TopologySortOperations(const Graph &graph) { ...@@ -117,34 +119,30 @@ std::vector<ir::Node *> TopologySortOperations(const Graph &graph) {
SortHelper(adj_list, adj.first, &visited, &ret); SortHelper(adj_list, adj.first, &visited, &ret);
} }
} }
return ret; return ret;
} }
// Build operator inlink edge table. // Build operator inlink edge table.
std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList( std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>, ir::NodeComp>
const Graph &graph) { BuildOperationAdjList(const Graph &graph) {
std::map<ir::Node *, std::unordered_set<ir::Node *>> adj_list; std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>, ir::NodeComp>
adj_list;
for (auto &n : graph.Nodes()) { for (auto &n : graph.Nodes()) {
if (!n->IsOp()) continue; if (!n->IsOp()) continue;
if (adj_list.find(n) == adj_list.end()) { if (adj_list.find(n) == adj_list.end()) {
adj_list[n] = std::unordered_set<ir::Node *>(); adj_list[n] = std::set<ir::Node *, ir::NodeComp>();
} }
std::vector<ir::Node *> nodes;
for (auto &var : n->inputs) { for (auto &var : n->inputs) {
for (auto &adj_n : var->inputs) { for (auto &adj_n : var->inputs) {
PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation); PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation);
VLOG(4) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n) VLOG(4) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
<< " -> " << n->Name() << reinterpret_cast<void *>(n) << " -> " << n->Name() << reinterpret_cast<void *>(n)
<< " via " << var->Name() << reinterpret_cast<void *>(var); << " via " << var->Name() << reinterpret_cast<void *>(var);
nodes.push_back(adj_n); adj_list[n].insert(adj_n);
} }
} }
std::sort(nodes.begin(), nodes.end(), [](ir::Node *node1, ir::Node *node2) {
return node1->id() > node2->id();
});
adj_list[n].insert(std::make_move_iterator(nodes.begin()),
std::make_move_iterator(nodes.end()));
} }
return adj_list; return adj_list;
} }
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <map> #include <map>
#include <memory> #include <memory>
#include <set>
#include <vector> #include <vector>
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
...@@ -25,6 +26,13 @@ namespace paddle { ...@@ -25,6 +26,13 @@ namespace paddle {
namespace framework { namespace framework {
namespace ir { namespace ir {
// Compare nodes via node id.
struct NodeComp {
bool operator()(ir::Node *const &node1, ir::Node *const &node2) const {
return node1->id() < node2->id();
}
};
// Test if the graph contains circle. // Test if the graph contains circle.
bool HasCircle(const Graph &graph); bool HasCircle(const Graph &graph);
...@@ -57,8 +65,8 @@ std::vector<Node *> TopologyVarientSort(const Graph &graph, SortKind sort_kind); ...@@ -57,8 +65,8 @@ std::vector<Node *> TopologyVarientSort(const Graph &graph, SortKind sort_kind);
void CleanIndividualNodes(Graph *graph); void CleanIndividualNodes(Graph *graph);
// Build an adjacency list of operations for the `graph`. // Build an adjacency list of operations for the `graph`.
std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList( std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>, ir::NodeComp>
const Graph &graph); BuildOperationAdjList(const Graph &graph);
template <typename T> template <typename T>
std::vector<T *> FilterByNodeWrapper(const Graph &graph) { std::vector<T *> FilterByNodeWrapper(const Graph &graph) {
......
...@@ -241,6 +241,7 @@ OpDesc::OpDesc(const std::string &type, const VariableNameMap &inputs, ...@@ -241,6 +241,7 @@ OpDesc::OpDesc(const std::string &type, const VariableNameMap &inputs,
outputs_ = outputs; outputs_ = outputs;
attrs_ = attrs; attrs_ = attrs;
need_update_ = true; need_update_ = true;
block_ = nullptr;
} }
OpDesc::OpDesc(const OpDesc &other, BlockDesc *block) { OpDesc::OpDesc(const OpDesc &other, BlockDesc *block) {
......
...@@ -880,7 +880,16 @@ std::vector<KernelConfig>* OperatorWithKernel::GetKernelConfig( ...@@ -880,7 +880,16 @@ std::vector<KernelConfig>* OperatorWithKernel::GetKernelConfig(
void OperatorWithKernel::RunImpl(const Scope& scope, void OperatorWithKernel::RunImpl(const Scope& scope,
const platform::Place& place) const { const platform::Place& place) const {
if (!HasAttr(kEnableCacheRuntimeContext)) { // To reduce the elapsed time of HasAttr, we use bool variable to record the
// result of HasAttr.
if (!enable_cache_runtime_context && HasAttr(kEnableCacheRuntimeContext))
enable_cache_runtime_context = true;
if (!enable_cache_expected_kernel && HasAttr(kEnableCacheExpectedKernel))
enable_cache_expected_kernel = true;
if (!all_kernels_must_compute_runtime_shape &&
HasAttr(kAllKernelsMustComputeRuntimeShape))
all_kernels_must_compute_runtime_shape = true;
if (!enable_cache_runtime_context) {
RuntimeContext ctx(Inputs(), Outputs(), scope); RuntimeContext ctx(Inputs(), Outputs(), scope);
RunImpl(scope, place, &ctx); RunImpl(scope, place, &ctx);
} else { } else {
...@@ -899,7 +908,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -899,7 +908,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place); auto* dev_ctx = pool.Get(place);
if (!HasAttr(kEnableCacheExpectedKernel) || !kernel_type_) { if (!enable_cache_expected_kernel || !kernel_type_) {
ChooseKernel(*runtime_ctx, scope, place); ChooseKernel(*runtime_ctx, scope, place);
} }
...@@ -918,7 +927,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -918,7 +927,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
dev_ctx = pool.Get(kernel_type_->place_); dev_ctx = pool.Get(kernel_type_->place_);
} }
if (!HasAttr(kAllKernelsMustComputeRuntimeShape)) { if (!all_kernels_must_compute_runtime_shape) {
RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, *runtime_ctx); RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, *runtime_ctx);
this->InferShape(&infer_shape_ctx); this->InferShape(&infer_shape_ctx);
} }
......
...@@ -506,6 +506,9 @@ class OperatorWithKernel : public OperatorBase { ...@@ -506,6 +506,9 @@ class OperatorWithKernel : public OperatorBase {
mutable std::unique_ptr<OpKernelFunc> kernel_func_; mutable std::unique_ptr<OpKernelFunc> kernel_func_;
mutable std::unique_ptr<RuntimeContext> runtime_ctx_; mutable std::unique_ptr<RuntimeContext> runtime_ctx_;
mutable const Scope* pre_scope_ = nullptr; mutable const Scope* pre_scope_ = nullptr;
mutable bool enable_cache_runtime_context = false;
mutable bool enable_cache_expected_kernel = false;
mutable bool all_kernels_must_compute_runtime_shape = false;
}; };
extern bool OpSupportGPU(const std::string& op_type); extern bool OpSupportGPU(const std::string& op_type);
......
...@@ -221,7 +221,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places, ...@@ -221,7 +221,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
PADDLE_ENFORCE(!member_->use_cuda_, PADDLE_ENFORCE(!member_->use_cuda_,
"gpu mode does not support async_mode_ now!"); "gpu mode does not support async_mode_ now!");
graphs.push_back(graph); graphs.push_back(graph);
for (int i = 1; i < places.size(); ++i) { for (size_t i = 1; i < places.size(); ++i) {
auto *tmp_graph = new ir::Graph(graph->OriginProgram()); auto *tmp_graph = new ir::Graph(graph->OriginProgram());
async_graphs_.emplace_back(tmp_graph); async_graphs_.emplace_back(tmp_graph);
graphs.push_back(tmp_graph); graphs.push_back(tmp_graph);
...@@ -315,7 +315,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places, ...@@ -315,7 +315,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
graph = build_strategy.Apply(graph, {member_->places_[0]}, loss_var_name, graph = build_strategy.Apply(graph, {member_->places_[0]}, loss_var_name,
{member_->local_scopes_[0]}, 1, {member_->local_scopes_[0]}, 1,
member_->use_cuda_, member_->nccl_ctxs_.get()); member_->use_cuda_, member_->nccl_ctxs_.get());
for (int i = 1; i < member_->places_.size(); ++i) { for (size_t i = 1; i < member_->places_.size(); ++i) {
graphs[i] = graphs[i] =
build_strategy.Apply(graphs[i], {member_->places_[i]}, loss_var_name, build_strategy.Apply(graphs[i], {member_->places_[i]}, loss_var_name,
{member_->local_scopes_[i]}, 1, {member_->local_scopes_[i]}, 1,
......
...@@ -76,7 +76,7 @@ message PullDenseWorkerParameter { ...@@ -76,7 +76,7 @@ message PullDenseWorkerParameter {
message TableParameter { message TableParameter {
// dense table only // dense table only
optional int64 table_id = 1; optional uint64 table_id = 1;
repeated string dense_value_name = 2; repeated string dense_value_name = 2;
repeated string dense_grad_name = 3; repeated string dense_grad_name = 3;
repeated int32 push_dense_wait_times = 5; repeated int32 push_dense_wait_times = 5;
......
...@@ -259,6 +259,9 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs, ...@@ -259,6 +259,9 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
return false; return false;
} }
PADDLE_ENFORCE_NOT_NULL(input_ptr);
PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data());
if (platform::is_cpu_place(place_)) { if (platform::is_cpu_place(place_)) {
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(), std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
......
...@@ -54,6 +54,7 @@ PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) { ...@@ -54,6 +54,7 @@ PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) {
memory_owned_ = other.memory_owned_; memory_owned_ = other.memory_owned_;
} else { } else {
Resize(other.length()); Resize(other.length());
PADDLE_ENFORCE(!(other.length() > 0 && other.data() == nullptr));
memcpy(data_, other.data(), other.length()); memcpy(data_, other.data(), other.length());
length_ = other.length(); length_ = other.length();
memory_owned_ = true; memory_owned_ = true;
......
...@@ -169,6 +169,7 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() { ...@@ -169,6 +169,7 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_)); std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
// Hot fix the bug that result diff in multi-thread. // Hot fix the bug that result diff in multi-thread.
// TODO(Superjomn) re-implement a real clone here. // TODO(Superjomn) re-implement a real clone here.
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<NativePaddlePredictor *>(cls.get()));
if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) { if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
LOG(ERROR) << "fail to call Init"; LOG(ERROR) << "fail to call Init";
return nullptr; return nullptr;
...@@ -210,6 +211,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs, ...@@ -210,6 +211,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
return false; return false;
} }
PADDLE_ENFORCE_NOT_NULL(input_ptr);
PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data());
if (platform::is_cpu_place(place_)) { if (platform::is_cpu_place(place_)) {
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(), std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
...@@ -316,6 +319,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -316,6 +319,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
} }
std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config)); std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<NativePaddlePredictor *>(predictor.get()));
if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) { if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
return nullptr; return nullptr;
} }
......
...@@ -123,8 +123,8 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { ...@@ -123,8 +123,8 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
// will enhance this pass later. // will enhance this pass later.
"runtime_context_cache_pass", // "runtime_context_cache_pass", //
"attention_lstm_fuse_pass", // "attention_lstm_fuse_pass", //
"seqpool_concat_fuse_pass", //
"seqconv_eltadd_relu_fuse_pass", // "seqconv_eltadd_relu_fuse_pass", //
// "seqpool_concat_fuse_pass", //
// "embedding_fc_lstm_fuse_pass", // // "embedding_fc_lstm_fuse_pass", //
"fc_lstm_fuse_pass", // "fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", // "mul_lstm_fuse_pass", //
......
...@@ -47,6 +47,7 @@ struct DataRecord { ...@@ -47,6 +47,7 @@ struct DataRecord {
num_lines++; num_lines++;
std::vector<std::string> data; std::vector<std::string> data;
split(line, '\t', &data); split(line, '\t', &data);
PADDLE_ENFORCE(data.size() >= 4);
// load title1 data // load title1 data
std::vector<int64_t> title1_data; std::vector<int64_t> title1_data;
split_to_int64(data[0], ' ', &title1_data); split_to_int64(data[0], ' ', &title1_data);
......
...@@ -150,6 +150,9 @@ void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false) { ...@@ -150,6 +150,9 @@ void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false) {
if (use_mkldnn) { if (use_mkldnn) {
cfg->EnableMKLDNN(); cfg->EnableMKLDNN();
} }
// Enable seqpool_concat_fuse_pass, disabled by default since it takes much
// time
cfg->pass_builder()->InsertPass(2, "seqpool_concat_fuse_pass");
} }
void profile(bool use_mkldnn = false) { void profile(bool use_mkldnn = false) {
......
...@@ -214,28 +214,23 @@ TEST(Analyzer_Transformer, fuse_statis) { ...@@ -214,28 +214,23 @@ TEST(Analyzer_Transformer, fuse_statis) {
} }
// Compare result of NativeConfig and AnalysisConfig // Compare result of NativeConfig and AnalysisConfig
// void compare(bool use_mkldnn = false) { void compare(bool use_mkldnn = false) {
// AnalysisConfig cfg; AnalysisConfig cfg;
// SetConfig(&cfg); SetConfig(&cfg);
// if (use_mkldnn) { if (use_mkldnn) {
// cfg.EnableMKLDNN(); cfg.EnableMKLDNN();
// } }
//
// std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
// SetInput(&input_slots_all); SetInput(&input_slots_all);
// CompareNativeAndAnalysis( CompareNativeAndAnalysis(
// reinterpret_cast<const PaddlePredictor::Config *>(&cfg), reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
// input_slots_all); }
// }
TEST(Analyzer_Transformer, compare) { compare(); }
// TODO(yihuaxu): #ifdef PADDLE_WITH_MKLDNN
// Disable compare and compare_mkldnn temporary, see TEST(Analyzer_Transformer, compare_mkldnn) { compare(true /* use_mkldnn */); }
// https://github.com/paddlePaddle/Paddle/issues/16316 for details. #endif
// TEST(Analyzer_Transformer, compare) { compare(); }
// #ifdef PADDLE_WITH_MKLDNN
// TEST(Analyzer_Transformer, compare_mkldnn) { compare(true /* use_mkldnn */);
// }
// #endif
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
abs
acos
asin
atan
attention_lstm attention_lstm
brelu
conv_shift conv_shift
cos
cos_sim cos_sim
dequantize dequantize
elu
fc fc
flatten flatten
fsp fsp
...@@ -21,14 +14,8 @@ fusion_seqconv_eltadd_relu ...@@ -21,14 +14,8 @@ fusion_seqconv_eltadd_relu
fusion_seqexpand_concat_fc fusion_seqexpand_concat_fc
fusion_seqpool_concat fusion_seqpool_concat
fusion_squared_mat_sub fusion_squared_mat_sub
gelu
gru gru
hard_shrink
hierarchical_sigmoid hierarchical_sigmoid
leaky_relu
log
logsigmoid
lookup_table
lrn lrn
lstm_unit lstm_unit
lstmp lstmp
...@@ -39,10 +26,11 @@ modified_huber_loss ...@@ -39,10 +26,11 @@ modified_huber_loss
nce nce
pool2d pool2d
pool3d pool3d
pow
prelu prelu
quantize quantize
rank_loss rank_loss
reduce_all
reduce_any
reduce_max reduce_max
reduce_mean reduce_mean
reduce_min reduce_min
...@@ -51,26 +39,10 @@ reduce_sum ...@@ -51,26 +39,10 @@ reduce_sum
requantize requantize
reshape reshape
rnn_memory_helper rnn_memory_helper
round
sequence_softmax sequence_softmax
sin
softplus
softshrink
softsign
space_to_depth
spp spp
square
squared_l2_distance
squared_l2_norm
squeeze squeeze
stanh
swish
tanh_shrink
teacher_student_sigmoid_loss
tensor_array_to_tensor tensor_array_to_tensor
thresholded_relu
transpose transpose
tree_conv
unpool unpool
unsqueeze unsqueeze
warpctc
...@@ -12,6 +12,9 @@ ...@@ -12,6 +12,9 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/cudnn_desc.h" #include "paddle/fluid/platform/cudnn_desc.h"
...@@ -82,6 +85,8 @@ template <typename T> ...@@ -82,6 +85,8 @@ template <typename T>
struct CudnnReluGradFunctor : public CudnnActivationGradFunctor<T> { struct CudnnReluGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnReluGradFunctor(const CUDADeviceContext& ctx) explicit CudnnReluGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_RELU) {} : CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_RELU) {}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
template <typename T> template <typename T>
...@@ -94,6 +99,8 @@ struct CudnnRelu6GradFunctor : public CudnnActivationGradFunctor<T> { ...@@ -94,6 +99,8 @@ struct CudnnRelu6GradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnRelu6GradFunctor(const CUDADeviceContext& ctx) explicit CudnnRelu6GradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) { : CudnnActivationGradFunctor<T>(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) {
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
template <typename T> template <typename T>
...@@ -105,6 +112,8 @@ template <typename T> ...@@ -105,6 +112,8 @@ template <typename T>
struct CudnnSigmoidGradFunctor : public CudnnActivationGradFunctor<T> { struct CudnnSigmoidGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnSigmoidGradFunctor(const CUDADeviceContext& ctx) explicit CudnnSigmoidGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {} : CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
template <typename T> template <typename T>
...@@ -116,6 +125,8 @@ template <typename T> ...@@ -116,6 +125,8 @@ template <typename T>
struct CudnnTanhGradFunctor : public CudnnActivationGradFunctor<T> { struct CudnnTanhGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnTanhGradFunctor(const CUDADeviceContext& ctx) explicit CudnnTanhGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_TANH) {} : CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_TANH) {}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
template <typename Functor> template <typename Functor>
...@@ -140,10 +151,13 @@ class CudnnActivationGradKernel ...@@ -140,10 +151,13 @@ class CudnnActivationGradKernel
public: public:
using T = typename Functor::ELEMENT_TYPE; using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
static_assert(Functor::FwdDeps() == kDepOut, "Forward deps must be Out.");
const framework::Tensor *X, *Out, *dOut; const framework::Tensor *X, *Out, *dOut;
X = Out = dOut = nullptr; X = Out = dOut = nullptr;
framework::Tensor* dX = nullptr; framework::Tensor* dX = nullptr;
ExtractActivationGradTensor(context, &X, &Out, &dOut, &dX); ExtractActivationGradTensor<Functor::FwdDeps()>(context, &X, &Out, &dOut,
&dX);
dX->mutable_data<T>(context.GetPlace()); dX->mutable_data<T>(context.GetPlace());
auto& dev_ctx = context.template device_context<CUDADeviceContext>(); auto& dev_ctx = context.template device_context<CUDADeviceContext>();
Functor functor(dev_ctx); Functor functor(dev_ctx);
......
...@@ -15,7 +15,9 @@ limitations under the License. */ ...@@ -15,7 +15,9 @@ limitations under the License. */
#include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/activation_op.h"
#include <memory> #include <memory>
#include <string> #include <string>
#include <type_traits>
#include <unordered_map> #include <unordered_map>
#include <vector>
#include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h" #include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h"
#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/port.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -27,6 +29,25 @@ namespace operators { ...@@ -27,6 +29,25 @@ namespace operators {
using paddle::framework::Tensor; using paddle::framework::Tensor;
template <typename GradFunctor>
static constexpr bool CanInplaceAct() {
return GradFunctor::FwdDeps() == kDepOut || GradFunctor::FwdDeps() == kNoDeps;
}
std::unique_ptr<std::unordered_set<std::string>> GetInplaceOpSet() {
std::unique_ptr<std::unordered_set<std::string>> ret(
new std::unordered_set<std::string>());
#define INSERT_INTO_INPLACE_OP_SET(op_type, __omitted, fwd_functor, \
bwd_functor) \
if (CanInplaceAct<bwd_functor<float>>()) { \
ret->insert(#op_type); \
}
FOR_EACH_ACTIVATION_OP(INSERT_INTO_INPLACE_OP_SET);
#undef INSERT_INTO_INPLACE_OP_SET
return ret;
}
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \ #define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker \ class OP_NAME##OpMaker \
: public ::paddle::framework::OpProtoAndCheckerMaker { \ : public ::paddle::framework::OpProtoAndCheckerMaker { \
...@@ -50,26 +71,32 @@ using paddle::framework::Tensor; ...@@ -50,26 +71,32 @@ using paddle::framework::Tensor;
} \ } \
} }
#define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \ template <ActBwdOpFwdDeps kDepValue>
class OP_NAME##GradMaker \ class ActivationGradOpDescMaker : public framework::SingleGradOpDescMaker {
: public ::paddle::framework::SingleGradOpDescMaker { \ public:
public: \ using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
using ::paddle::framework::SingleGradOpDescMaker::SingleGradOpDescMaker; \
\ protected:
protected: \ std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<::paddle::framework::OpDesc> Apply() const override { \ std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
auto* op = new ::paddle::framework::OpDesc(); \ op->SetType(ForwardOpType() + "_grad");
op->SetType(#KERNEL_TYPE "_grad"); \ op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetInput("Out", Output("Out")); \ op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetInput(::paddle::framework::GradVarName("Out"), \ op->SetAttrMap(Attrs());
OutputGrad("Out")); \
\ if (static_cast<int>(kDepValue) &
op->SetAttrMap(Attrs()); \ static_cast<int>(ActBwdOpFwdDeps::kDepX)) {
\ op->SetInput("X", Input("X"));
op->SetOutput(::paddle::framework::GradVarName("X"), InputGrad("X")); \ }
return std::unique_ptr<::paddle::framework::OpDesc>(op); \
} \ if (static_cast<int>(kDepValue) &
static_cast<int>(ActBwdOpFwdDeps::kDepOut)) {
op->SetInput("Out", Output("Out"));
}
return op;
} }
};
framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx, framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx,
const framework::OperatorWithKernel& oper, const framework::OperatorWithKernel& oper,
...@@ -129,14 +156,15 @@ class ActivationOpGrad : public framework::OperatorWithKernel { ...@@ -129,14 +156,15 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
ctx->ShareDim("Out", framework::GradVarName("X")); auto out_grad_name = framework::GradVarName("Out");
ctx->ShareLoD("Out", framework::GradVarName("X")); ctx->ShareDim(out_grad_name, framework::GradVarName("X"));
ctx->ShareLoD(out_grad_name, framework::GradVarName("X"));
} }
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return GetKernelType(ctx, *this, "Out"); return GetKernelType(ctx, *this, framework::GradVarName("Out"));
} }
}; };
...@@ -558,79 +586,27 @@ REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc); ...@@ -558,79 +586,27 @@ REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc);
REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc); REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc);
REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc); REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc);
REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc); REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Sigmoid, sigmoid);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Relu, relu);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Gelu, gelu);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Exp, exp);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Tanh, tanh);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Ceil, ceil);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Floor, floor);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Sqrt, sqrt);
REGISTER_ACTIVATION_OP_GRAD_MAKER(SoftRelu, soft_relu);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Relu6, relu6);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Reciprocal, reciprocal);
REGISTER_ACTIVATION_OP_GRAD_MAKER(HardSigmoid, hard_sigmoid);
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
#define FOR_EACH_INPLACE_OP_FUNCTOR(__macro) \ #define REGISTER_ACTIVATION_OP(KERNEL_TYPE, OP_NAME, functor, grad_functor) \
__macro(Sigmoid, sigmoid); \ REGISTER_OPERATOR( \
__macro(Relu, relu); \ KERNEL_TYPE, ops::ActivationOp, ops::OP_NAME##OpMaker, \
__macro(Exp, exp); \ ops::ActivationOpInferVarType, \
__macro(Tanh, tanh); \ ops::ActivationGradOpDescMaker<ops::grad_functor<float>::FwdDeps()>, \
__macro(Ceil, ceil); \ std::conditional<ops::CanInplaceAct<ops::grad_functor<float>>(), \
__macro(Floor, floor); \ ::paddle::framework::SingleOpInplaceInToOut, \
__macro(Sqrt, sqrt); \ void>::type); \
__macro(SoftRelu, soft_relu); \ REGISTER_OPERATOR( \
__macro(Relu6, relu6); \ KERNEL_TYPE##_grad, ops::ActivationOpGrad, \
__macro(Reciprocal, reciprocal); \ std::conditional<ops::CanInplaceAct<ops::grad_functor<float>>(), \
__macro(HardSigmoid, hard_sigmoid); ::paddle::framework::SingleOpInplaceInToOut, \
void>::type)
#define FOR_EACH_OP_FUNCTOR(__macro) \
__macro(LogSigmoid, logsigmoid); \ #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, op_name, functor, \
__macro(SoftShrink, softshrink); \ grad_functor) \
__macro(Abs, abs); \
__macro(Cos, cos); \
__macro(Acos, acos); \
__macro(Sin, sin); \
__macro(Asin, asin); \
__macro(Atan, atan); \
__macro(Round, round); \
__macro(Log, log); \
__macro(Square, square); \
__macro(Gelu, gelu); \
__macro(BRelu, brelu); \
__macro(Pow, pow); \
__macro(STanh, stanh); \
__macro(Softplus, softplus); \
__macro(Softsign, softsign); \
__macro(LeakyRelu, leaky_relu); \
__macro(TanhShrink, tanh_shrink); \
__macro(ELU, elu); \
__macro(HardShrink, hard_shrink); \
__macro(Swish, swish); \
__macro(ThresholdedRelu, thresholded_relu);
#define REGISTER_INPLACE_ACTIVATION_OP(OP_NAME, KERNEL_TYPE) \
REGISTER_OPERATOR(KERNEL_TYPE, ::paddle::operators::ActivationOp, \
::paddle::operators::OP_NAME##OpMaker, \
::paddle::operators::ActivationOpInferVarType, \
::paddle::operators::OP_NAME##GradMaker, \
::paddle::framework::SingleOpInplaceInToOut); \
REGISTER_OPERATOR(KERNEL_TYPE##_grad, ::paddle::operators::ActivationOpGrad, \
::paddle::framework::SingleOpInplaceInToOut)
#define REGISTER_ACTIVATION_OP(OP_NAME, KERNEL_TYPE) \
REGISTER_OPERATOR(KERNEL_TYPE, ::paddle::operators::ActivationOp, \
::paddle::operators::OP_NAME##OpMaker, \
::paddle::operators::ActivationOpInferVarType, \
::paddle::framework::DefaultGradOpDescMaker<true>); \
REGISTER_OPERATOR(KERNEL_TYPE##_grad, ::paddle::operators::ActivationOpGrad)
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \ REGISTER_OP_CPU_KERNEL( \
act_type, ops::ActivationKernel<paddle::platform::CPUDeviceContext, \ act_type, ops::ActivationKernel<paddle::platform::CPUDeviceContext, \
ops::functor<float>>, \ ops::functor<float>>, \
...@@ -643,6 +619,5 @@ namespace ops = paddle::operators; ...@@ -643,6 +619,5 @@ namespace ops = paddle::operators;
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, \ ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, \
ops::grad_functor<double>>); ops::grad_functor<double>>);
FOR_EACH_OP_FUNCTOR(REGISTER_ACTIVATION_OP); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP);
FOR_EACH_INPLACE_OP_FUNCTOR(REGISTER_INPLACE_ACTIVATION_OP); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CPU_KERNEL);
...@@ -15,7 +15,8 @@ limitations under the License. */ ...@@ -15,7 +15,8 @@ limitations under the License. */
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
#define REGISTER_ACTIVATION_CUDA_KERNEL(act_type, functor, grad_functor) \ #define REGISTER_ACTIVATION_CUDA_KERNEL(act_type, op_name, functor, \
grad_functor) \
REGISTER_OP_CUDA_KERNEL( \ REGISTER_OP_CUDA_KERNEL( \
act_type, \ act_type, \
ops::ActivationKernel<plat::CUDADeviceContext, ops::functor<float>>, \ ops::ActivationKernel<plat::CUDADeviceContext, ops::functor<float>>, \
...@@ -30,4 +31,4 @@ namespace plat = paddle::platform; ...@@ -30,4 +31,4 @@ namespace plat = paddle::platform;
ops::ActivationGradKernel<plat::CUDADeviceContext, \ ops::ActivationGradKernel<plat::CUDADeviceContext, \
ops::grad_functor<plat::float16>>); ops::grad_functor<plat::float16>>);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CUDA_KERNEL); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CUDA_KERNEL);
...@@ -12,6 +12,7 @@ limitations under the License. */ ...@@ -12,6 +12,7 @@ limitations under the License. */
#pragma once #pragma once
#include <glog/logging.h> #include <glog/logging.h>
#include <algorithm> #include <algorithm>
#include <memory>
#include <string> #include <string>
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>
...@@ -35,21 +36,29 @@ limitations under the License. */ ...@@ -35,21 +36,29 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
/* Use ugly global variable, for the using in python layer side enum ActBwdOpFwdDeps {
Please refer to the layer_helper.py and get the details. kNoDeps = 0x00, // Do not need any forward input/output
*/ kDepX = 0x01, // Only need forward input X
static std::unordered_set<std::string> InplaceOpSet = { kDepOut = 0x02, // Only need forward output Out
"sigmoid", "exp", "relu", "tanh", "sqrt", "ceil",
"floor", "reciprocal", "relu6", "soft_relu", "hard_sigmoid"}; // Never add kDepXOut, because Out can be always calculated
// by forward input X in backward part.
// FIXME(zjl): but in MKLDNN abs, X and Out are all needed...
// Developers should not rely on this enum value!
kDepXOut = 0x03
};
std::unique_ptr<std::unordered_set<std::string>> GetInplaceOpSet();
static bool IsInplace(const std::string& op) { static bool IsInplace(const std::string& op) {
bool inplace = InplaceOpSet.count(op); static auto InplaceOpSet = GetInplaceOpSet();
bool inplace = InplaceOpSet->count(op);
// for op_grad // for op_grad
const int kGradSuffixLen = 4; const int kGradSuffixLen = 4;
if (op.size() > kGradSuffixLen && if (op.size() > kGradSuffixLen &&
op.compare(op.size() - kGradSuffixLen - 1, kGradSuffixLen, "grad")) { op.compare(op.size() - kGradSuffixLen - 1, kGradSuffixLen, "grad")) {
inplace = inplace =
InplaceOpSet.count(op.substr(0, op.size() - (kGradSuffixLen + 1))); InplaceOpSet->count(op.substr(0, op.size() - (kGradSuffixLen + 1)));
} }
return inplace; return inplace;
} }
...@@ -85,16 +94,21 @@ inline void ExtractActivationTensor(const framework::ExecutionContext& context, ...@@ -85,16 +94,21 @@ inline void ExtractActivationTensor(const framework::ExecutionContext& context,
context.op().Output("Out")); context.op().Output("Out"));
} }
template <ActBwdOpFwdDeps kDepValue>
inline void ExtractActivationGradTensor( inline void ExtractActivationGradTensor(
const framework::ExecutionContext& context, const framework::Tensor** X, const framework::ExecutionContext& context, const framework::Tensor** X,
const framework::Tensor** Out, const framework::Tensor** dOut, const framework::Tensor** Out, const framework::Tensor** dOut,
framework::Tensor** dX) { framework::Tensor** dX) {
auto out_var = context.InputVar("Out");
auto out_grad_var = context.InputVar(framework::GradVarName("Out")); auto out_grad_var = context.InputVar(framework::GradVarName("Out"));
auto x_grad_var = context.OutputVar(framework::GradVarName("X")); auto x_grad_var = context.OutputVar(framework::GradVarName("X"));
PADDLE_ENFORCE(out_var != nullptr, const framework::Variable* out_var = nullptr;
"Cannot get input Variable Out, variable name = %s",
context.op().Input("Out")); if (static_cast<int>(kDepValue) & static_cast<int>(kDepOut)) {
out_var = context.InputVar("Out");
PADDLE_ENFORCE(out_var != nullptr,
"Cannot get input Variable Out, variable name = %s",
context.op().Input("Out"));
}
PADDLE_ENFORCE(out_grad_var != nullptr, PADDLE_ENFORCE(out_grad_var != nullptr,
"Cannot get input Variable %s, variable name = %s", "Cannot get input Variable %s, variable name = %s",
framework::GradVarName("Out"), framework::GradVarName("Out"),
...@@ -105,23 +119,36 @@ inline void ExtractActivationGradTensor( ...@@ -105,23 +119,36 @@ inline void ExtractActivationGradTensor(
context.op().Output(framework::GradVarName("X"))); context.op().Output(framework::GradVarName("X")));
if (CanBeUsedBySelectedRows.count(context.op().Type())) { if (CanBeUsedBySelectedRows.count(context.op().Type())) {
*Out = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var);
*dOut = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar( *dOut = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(
*out_grad_var); *out_grad_var);
*dX = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar( *dX = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
x_grad_var); x_grad_var);
if (out_var) {
*Out =
paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var);
} else {
*Out = *dOut; // fake out
}
} else { } else {
*Out = context.Input<framework::Tensor>("Out"); *Out = context.Input<framework::Tensor>("Out");
*dOut = context.Input<framework::Tensor>(framework::GradVarName("Out")); *dOut = context.Input<framework::Tensor>(framework::GradVarName("Out"));
*dX = context.Output<framework::Tensor>(framework::GradVarName("X")); *dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
if (out_var) {
*Out = &(out_var->Get<framework::LoDTensor>());
} else {
*Out = *dOut; // fake out
}
} }
PADDLE_ENFORCE(*dX != nullptr, PADDLE_ENFORCE(*dX != nullptr,
"Cannot get output tensor %s, variable name = %s", "Cannot get output tensor %s, variable name = %s",
framework::GradVarName("X"), framework::GradVarName("X"),
context.op().Output(framework::GradVarName("X"))); context.op().Output(framework::GradVarName("X")));
bool inplace = IsInplace(context.op().Type()); if (static_cast<int>(kDepValue) & static_cast<int>(kDepX)) {
if (!inplace) {
auto x_var = context.InputVar("X"); auto x_var = context.InputVar("X");
PADDLE_ENFORCE(x_var != nullptr, PADDLE_ENFORCE(x_var != nullptr,
"Cannot get input tensor X, variable name = %s", "Cannot get input tensor X, variable name = %s",
...@@ -172,7 +199,8 @@ class ActivationGradKernel ...@@ -172,7 +199,8 @@ class ActivationGradKernel
const framework::Tensor *X, *Out, *dOut; const framework::Tensor *X, *Out, *dOut;
framework::Tensor* dX = nullptr; framework::Tensor* dX = nullptr;
X = Out = dOut = nullptr; X = Out = dOut = nullptr;
ExtractActivationGradTensor(context, &X, &Out, &dOut, &dX); ExtractActivationGradTensor<Functor::FwdDeps()>(context, &X, &Out, &dOut,
&dX);
dX->mutable_data<T>(context.GetPlace()); dX->mutable_data<T>(context.GetPlace());
auto dout = framework::EigenVector<T>::Flatten(detail::Ref(dOut)); auto dout = framework::EigenVector<T>::Flatten(detail::Ref(dOut));
auto out = framework::EigenVector<T>::Flatten(detail::Ref(Out)); auto out = framework::EigenVector<T>::Flatten(detail::Ref(Out));
...@@ -222,6 +250,8 @@ struct SigmoidGradFunctor : public BaseActivationFunctor<T> { ...@@ -222,6 +250,8 @@ struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * out * (static_cast<T>(1) - out); dx.device(d) = dout * out * (static_cast<T>(1) - out);
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
// Originally: logsigmoid(x) = -log (1 + exp(-x)) // Originally: logsigmoid(x) = -log (1 + exp(-x))
...@@ -258,6 +288,8 @@ struct LogSigmoidGradFunctor : public BaseActivationFunctor<T> { ...@@ -258,6 +288,8 @@ struct LogSigmoidGradFunctor : public BaseActivationFunctor<T> {
dx.device(d) = dx.device(d) =
dout * ((-x - temp).exp() / ((-temp).exp() + (-x - temp).exp())); dout * ((-x - temp).exp() / ((-temp).exp() + (-x - temp).exp()));
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
// exp(x) = e^x // exp(x) = e^x
...@@ -276,6 +308,8 @@ struct ExpGradFunctor : public BaseActivationFunctor<T> { ...@@ -276,6 +308,8 @@ struct ExpGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * out; dx.device(d) = dout * out;
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
// relu(x) = max(x, 0) // relu(x) = max(x, 0)
...@@ -294,6 +328,8 @@ struct ReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -294,6 +328,8 @@ struct ReluGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * (out > static_cast<T>(0)).template cast<T>(); dx.device(d) = dout * (out > static_cast<T>(0)).template cast<T>();
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
// gelu(x) = 0.5 * x * (1 + erf(x / sqrt(2))) // gelu(x) = 0.5 * x * (1 + erf(x / sqrt(2)))
...@@ -338,6 +374,8 @@ struct GeluGradFunctor : BaseActivationFunctor<T> { ...@@ -338,6 +374,8 @@ struct GeluGradFunctor : BaseActivationFunctor<T> {
(-static_cast<T>(0.5) * x.square()).exp(); (-static_cast<T>(0.5) * x.square()).exp();
dx.device(d) = dout * (first + second); dx.device(d) = dout * (first + second);
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
// tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) // tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
...@@ -356,6 +394,8 @@ struct TanhGradFunctor : public BaseActivationFunctor<T> { ...@@ -356,6 +394,8 @@ struct TanhGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * (static_cast<T>(1) - out * out); dx.device(d) = dout * (static_cast<T>(1) - out * out);
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
// tanhshrink(x) = x - tanh(x) // tanhshrink(x) = x - tanh(x)
...@@ -375,6 +415,8 @@ struct TanhShrinkGradFunctor : public BaseActivationFunctor<T> { ...@@ -375,6 +415,8 @@ struct TanhShrinkGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * (x.tanh() * x.tanh()); dx.device(d) = dout * (x.tanh() * x.tanh());
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
// tanhshrink(x) = x - tanh(x) // tanhshrink(x) = x - tanh(x)
...@@ -409,6 +451,8 @@ struct HardShrinkGradFunctor : public BaseActivationFunctor<T> { ...@@ -409,6 +451,8 @@ struct HardShrinkGradFunctor : public BaseActivationFunctor<T> {
auto temp2 = (x > static_cast<T>(threshold)).template cast<T>().eval(); auto temp2 = (x > static_cast<T>(threshold)).template cast<T>().eval();
dx.device(d) = dout * (temp1 + temp2).template cast<T>(); dx.device(d) = dout * (temp1 + temp2).template cast<T>();
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
// softshrink(x) = x - lambda, if x > lambda; x + lambda, if x < -lambda; 0 // softshrink(x) = x - lambda, if x > lambda; x + lambda, if x < -lambda; 0
...@@ -443,6 +487,8 @@ struct SoftShrinkGradFunctor : public BaseActivationFunctor<T> { ...@@ -443,6 +487,8 @@ struct SoftShrinkGradFunctor : public BaseActivationFunctor<T> {
auto temp2 = (x < -lambdaT).template cast<T>().eval(); auto temp2 = (x < -lambdaT).template cast<T>().eval();
dx.device(d) = dout * (temp1 + temp2).template cast<T>(); dx.device(d) = dout * (temp1 + temp2).template cast<T>();
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
// sqrt(x) = x^(1/2) // sqrt(x) = x^(1/2)
...@@ -461,6 +507,8 @@ struct SqrtGradFunctor : public BaseActivationFunctor<T> { ...@@ -461,6 +507,8 @@ struct SqrtGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = static_cast<T>(0.5) * dout / out; dx.device(d) = static_cast<T>(0.5) * dout / out;
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
// ceil(x) = ceiling(x) // ceil(x) = ceiling(x)
...@@ -479,6 +527,8 @@ struct ZeroGradFunctor : public BaseActivationFunctor<T> { ...@@ -479,6 +527,8 @@ struct ZeroGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = static_cast<T>(0) / out; dx.device(d) = static_cast<T>(0) / out;
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kNoDeps; }
}; };
// floor(x) = flooring(x) // floor(x) = flooring(x)
...@@ -522,6 +572,8 @@ struct CosGradFunctor : public BaseActivationFunctor<T> { ...@@ -522,6 +572,8 @@ struct CosGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = -dout * x.unaryExpr(Sine<T>()); dx.device(d) = -dout * x.unaryExpr(Sine<T>());
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
// cosine(x) = cos(x) // cosine(x) = cos(x)
...@@ -541,6 +593,8 @@ struct SinGradFunctor : public BaseActivationFunctor<T> { ...@@ -541,6 +593,8 @@ struct SinGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * x.unaryExpr(Cosine<T>()); dx.device(d) = dout * x.unaryExpr(Cosine<T>());
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
// sine(x) = sin(x) // sine(x) = sin(x)
...@@ -582,6 +636,8 @@ struct AcosGradFunctor : public BaseActivationFunctor<T> { ...@@ -582,6 +636,8 @@ struct AcosGradFunctor : public BaseActivationFunctor<T> {
dx.device(d) = dx.device(d) =
-dout * static_cast<T>(1) / (static_cast<T>(1) - x.square()).sqrt(); -dout * static_cast<T>(1) / (static_cast<T>(1) - x.square()).sqrt();
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
template <typename T> template <typename T>
...@@ -614,6 +670,8 @@ struct AsinGradFunctor : public BaseActivationFunctor<T> { ...@@ -614,6 +670,8 @@ struct AsinGradFunctor : public BaseActivationFunctor<T> {
dx.device(d) = dx.device(d) =
dout * static_cast<T>(1) / (static_cast<T>(1) - x.square()).sqrt(); dout * static_cast<T>(1) / (static_cast<T>(1) - x.square()).sqrt();
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
template <typename T> template <typename T>
...@@ -645,6 +703,8 @@ struct AtanGradFunctor : public BaseActivationFunctor<T> { ...@@ -645,6 +703,8 @@ struct AtanGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * static_cast<T>(1) / (static_cast<T>(1) + x.square()); dx.device(d) = dout * static_cast<T>(1) / (static_cast<T>(1) + x.square());
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
// round(x) = [x] // round(x) = [x]
...@@ -672,6 +732,8 @@ struct AbsGradFunctor : public BaseActivationFunctor<T> { ...@@ -672,6 +732,8 @@ struct AbsGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * x.sign(); dx.device(d) = dout * x.sign();
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepXOut; }
}; };
// reciprocal(x) = 1 / x // reciprocal(x) = 1 / x
...@@ -690,6 +752,8 @@ struct ReciprocalGradFunctor : public BaseActivationFunctor<T> { ...@@ -690,6 +752,8 @@ struct ReciprocalGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * static_cast<T>(-1) * out * out; dx.device(d) = dout * static_cast<T>(-1) * out * out;
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
// log(x) = natural logarithm of x // log(x) = natural logarithm of x
...@@ -708,6 +772,8 @@ struct LogGradFunctor : public BaseActivationFunctor<T> { ...@@ -708,6 +772,8 @@ struct LogGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * (static_cast<T>(1) / x); dx.device(d) = dout * (static_cast<T>(1) / x);
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
// square(x) = x^2 // square(x) = x^2
...@@ -726,6 +792,8 @@ struct SquareGradFunctor : public BaseActivationFunctor<T> { ...@@ -726,6 +792,8 @@ struct SquareGradFunctor : public BaseActivationFunctor<T> {
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * static_cast<T>(2) * x; dx.device(d) = dout * static_cast<T>(2) * x;
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
template <typename T> template <typename T>
...@@ -760,6 +828,8 @@ struct BReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -760,6 +828,8 @@ struct BReluGradFunctor : public BaseActivationFunctor<T> {
((x > static_cast<T>(t_min)) * (x < static_cast<T>(t_max))) ((x > static_cast<T>(t_min)) * (x < static_cast<T>(t_max)))
.template cast<T>(); .template cast<T>();
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
// relu6(x) = min(max(0, x), 6) // relu6(x) = min(max(0, x), 6)
...@@ -792,6 +862,8 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> { ...@@ -792,6 +862,8 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
((out > static_cast<T>(0)) * (out < static_cast<T>(threshold))) ((out > static_cast<T>(0)) * (out < static_cast<T>(threshold)))
.template cast<T>(); .template cast<T>();
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
// softplus(x) = log(1 + exp(x)) // softplus(x) = log(1 + exp(x))
...@@ -821,6 +893,8 @@ struct SoftplusGradFunctor : public BaseActivationFunctor<T> { ...@@ -821,6 +893,8 @@ struct SoftplusGradFunctor : public BaseActivationFunctor<T> {
dx.device(d) = dx.device(d) =
dout * ((x - temp).exp() / ((-temp).exp() + (x - temp).exp())); dout * ((x - temp).exp() / ((-temp).exp() + (x - temp).exp()));
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
// softsign(x) = x / (1 + |x|) // softsign(x) = x / (1 + |x|)
...@@ -842,6 +916,8 @@ struct SoftsignGradFunctor : public BaseActivationFunctor<T> { ...@@ -842,6 +916,8 @@ struct SoftsignGradFunctor : public BaseActivationFunctor<T> {
dx.device(d) = dx.device(d) =
dout * (static_cast<T>(1) / (static_cast<T>(1) + x.abs()).square()); dout * (static_cast<T>(1) / (static_cast<T>(1) + x.abs()).square());
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
template <typename T> template <typename T>
...@@ -872,6 +948,8 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -872,6 +948,8 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
auto temp = ((out > -tmp) * (out < tmp)).template cast<T>().eval(); auto temp = ((out > -tmp) * (out < tmp)).template cast<T>().eval();
dx.device(d) = dout * (static_cast<T>(1) - (-out).exp()) * temp; dx.device(d) = dout * (static_cast<T>(1) - (-out).exp()) * temp;
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
template <typename T> template <typename T>
...@@ -901,6 +979,8 @@ struct LeakyReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -901,6 +979,8 @@ struct LeakyReluGradFunctor : public BaseActivationFunctor<T> {
auto temp2 = (x >= static_cast<T>(0)).template cast<T>().eval(); auto temp2 = (x >= static_cast<T>(0)).template cast<T>().eval();
dx.device(d) = dout * (temp1 + temp2).template cast<T>(); dx.device(d) = dout * (temp1 + temp2).template cast<T>();
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
template <typename T> template <typename T>
...@@ -928,9 +1008,11 @@ struct ELUGradFunctor : public BaseActivationFunctor<T> { ...@@ -928,9 +1008,11 @@ struct ELUGradFunctor : public BaseActivationFunctor<T> {
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * (x > static_cast<T>(0)).template cast<T>() + dx.device(d) = dout * (x > static_cast<T>(0)).template cast<T>() +
dout * (out + static_cast<T>(alpha)) * dout * static_cast<T>(alpha) * x.exp() *
(x < static_cast<T>(0)).template cast<T>(); (x < static_cast<T>(0)).template cast<T>();
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
// FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5198 // FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5198
...@@ -958,6 +1040,8 @@ struct PowGradFunctor : public BaseActivationFunctor<T> { ...@@ -958,6 +1040,8 @@ struct PowGradFunctor : public BaseActivationFunctor<T> {
dx.device(d) = dout * static_cast<T>(factor) * dx.device(d) = dout * static_cast<T>(factor) *
x.pow(static_cast<T>(factor) - static_cast<T>(1)); x.pow(static_cast<T>(factor) - static_cast<T>(1));
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
template <typename T> template <typename T>
...@@ -991,6 +1075,8 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> { ...@@ -991,6 +1075,8 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> {
auto temp = (a * x).tanh() * (a * x).tanh(); auto temp = (a * x).tanh() * (a * x).tanh();
dx.device(d) = dout * a * b * (static_cast<T>(1) - temp); dx.device(d) = dout * a * b * (static_cast<T>(1) - temp);
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
template <typename T> template <typename T>
...@@ -1020,6 +1106,8 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -1020,6 +1106,8 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
auto th = static_cast<T>(threshold); auto th = static_cast<T>(threshold);
dx.device(d) = dout * (x > th).template cast<T>(); dx.device(d) = dout * (x > th).template cast<T>();
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
template <typename T> template <typename T>
...@@ -1053,6 +1141,8 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> { ...@@ -1053,6 +1141,8 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
.template cast<T>() * .template cast<T>() *
static_cast<T>(slope); static_cast<T>(slope);
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
template <typename T> template <typename T>
...@@ -1077,49 +1167,54 @@ struct SwishGradFunctor : public BaseActivationFunctor<T> { ...@@ -1077,49 +1167,54 @@ struct SwishGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out fake_out, dOut dout, dX dx) const {
auto temp1 = static_cast<T>(1) / auto temp1 = static_cast<T>(1) /
(static_cast<T>(1) + (static_cast<T>(-beta) * x).exp()); (static_cast<T>(1) + (static_cast<T>(-beta) * x).exp());
auto out = x * temp1;
auto temp2 = temp1 * (static_cast<T>(1) - (static_cast<T>(beta) * out)); auto temp2 = temp1 * (static_cast<T>(1) - (static_cast<T>(beta) * out));
dx.device(d) = dout * ((static_cast<T>(beta) * out) + temp2); dx.device(d) = dout * ((static_cast<T>(beta) * out) + temp2);
} }
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
#define FOR_EACH_KERNEL_FUNCTOR(__macro) \ #define FOR_EACH_ACTIVATION_OP(__macro) \
__macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ __macro(sigmoid, Sigmoid, SigmoidFunctor, SigmoidGradFunctor); \
__macro(logsigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \ __macro(logsigmoid, LogSigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \
__macro(exp, ExpFunctor, ExpGradFunctor); \ __macro(exp, Exp, ExpFunctor, ExpGradFunctor); \
__macro(relu, ReluFunctor, ReluGradFunctor); \ __macro(relu, Relu, ReluFunctor, ReluGradFunctor); \
__macro(gelu, GeluFunctor, GeluGradFunctor); \ __macro(gelu, Gelu, GeluFunctor, GeluGradFunctor); \
__macro(tanh, TanhFunctor, TanhGradFunctor); \ __macro(tanh, Tanh, TanhFunctor, TanhGradFunctor); \
__macro(atan, AtanFunctor, AtanGradFunctor); \ __macro(atan, Atan, AtanFunctor, AtanGradFunctor); \
__macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ __macro(softshrink, SoftShrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \
__macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ __macro(sqrt, Sqrt, SqrtFunctor, SqrtGradFunctor); \
__macro(abs, AbsFunctor, AbsGradFunctor); \ __macro(abs, Abs, AbsFunctor, AbsGradFunctor); \
__macro(ceil, CeilFunctor, ZeroGradFunctor); \ __macro(ceil, Ceil, CeilFunctor, ZeroGradFunctor); \
__macro(floor, FloorFunctor, ZeroGradFunctor); \ __macro(floor, Floor, FloorFunctor, ZeroGradFunctor); \
__macro(cos, CosFunctor, CosGradFunctor); \ __macro(cos, Cos, CosFunctor, CosGradFunctor); \
__macro(acos, AcosFunctor, AcosGradFunctor); \ __macro(acos, Acos, AcosFunctor, AcosGradFunctor); \
__macro(sin, SinFunctor, SinGradFunctor); \ __macro(sin, Sin, SinFunctor, SinGradFunctor); \
__macro(asin, AsinFunctor, AsinGradFunctor); \ __macro(asin, Asin, AsinFunctor, AsinGradFunctor); \
__macro(round, RoundFunctor, ZeroGradFunctor); \ __macro(round, Round, RoundFunctor, ZeroGradFunctor); \
__macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ __macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(log, LogFunctor, LogGradFunctor); \ __macro(log, Log, LogFunctor, LogGradFunctor); \
__macro(square, SquareFunctor, SquareGradFunctor); \ __macro(square, Square, SquareFunctor, SquareGradFunctor); \
__macro(brelu, BReluFunctor, BReluGradFunctor); \ __macro(brelu, BRelu, BReluFunctor, BReluGradFunctor); \
__macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \ __macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(pow, PowFunctor, PowGradFunctor); \ __macro(pow, Pow, PowFunctor, PowGradFunctor); \
__macro(stanh, STanhFunctor, STanhGradFunctor); \ __macro(stanh, STanh, STanhFunctor, STanhGradFunctor); \
__macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \ __macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \ __macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6Functor, Relu6GradFunctor); \ __macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); \
__macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \ __macro(leaky_relu, LeakyRelu, LeakyReluFunctor, LeakyReluGradFunctor); \
__macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \ __macro(tanh_shrink, TanhShrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \
__macro(elu, ELUFunctor, ELUGradFunctor); \ __macro(elu, ELU, ELUFunctor, ELUGradFunctor); \
__macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \ __macro(hard_shrink, HardShrink, HardShrinkFunctor, HardShrinkGradFunctor); \
__macro(hard_sigmoid, HardSigmoidFunctor, HardSigmoidGradFunctor); \ __macro(hard_sigmoid, HardSigmoid, HardSigmoidFunctor, \
__macro(swish, SwishFunctor, SwishGradFunctor); \ HardSigmoidGradFunctor); \
__macro(thresholded_relu, ThresholdedReluFunctor, ThresholdedReluGradFunctor); __macro(swish, Swish, SwishFunctor, SwishGradFunctor); \
__macro(thresholded_relu, ThresholdedRelu, ThresholdedReluFunctor, \
ThresholdedReluGradFunctor);
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
**/ **/
#include "paddle/fluid/operators/detection/gpc.h" #include "paddle/fluid/operators/detection/gpc.h"
#include "paddle/fluid/platform/enforce.h"
namespace gpc { namespace gpc {
...@@ -689,6 +690,7 @@ static bbox *create_contour_bboxes(gpc_polygon *p) { ...@@ -689,6 +690,7 @@ static bbox *create_contour_bboxes(gpc_polygon *p) {
gpc_malloc<bbox>(box, p->num_contours * sizeof(bbox), gpc_malloc<bbox>(box, p->num_contours * sizeof(bbox),
const_cast<char *>("Bounding box creation")); const_cast<char *>("Bounding box creation"));
PADDLE_ENFORCE_NOT_NULL(box);
/* Construct contour bounding boxes */ /* Construct contour bounding boxes */
for (c = 0; c < p->num_contours; c++) { for (c = 0; c < p->num_contours; c++) {
...@@ -852,6 +854,7 @@ void gpc_add_contour(gpc_polygon *p, gpc_vertex_list *new_contour, int hole) { ...@@ -852,6 +854,7 @@ void gpc_add_contour(gpc_polygon *p, gpc_vertex_list *new_contour, int hole) {
/* Create an extended hole array */ /* Create an extended hole array */
gpc_malloc<int>(extended_hole, (p->num_contours + 1) * sizeof(int), gpc_malloc<int>(extended_hole, (p->num_contours + 1) * sizeof(int),
const_cast<char *>("contour hole addition")); const_cast<char *>("contour hole addition"));
PADDLE_ENFORCE_NOT_NULL(extended_hole);
/* Create an extended contour array */ /* Create an extended contour array */
gpc_malloc<gpc_vertex_list>(extended_contour, gpc_malloc<gpc_vertex_list>(extended_contour,
...@@ -969,6 +972,7 @@ void gpc_polygon_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, ...@@ -969,6 +972,7 @@ void gpc_polygon_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip,
/* Build scanbeam table from scanbeam tree */ /* Build scanbeam table from scanbeam tree */
gpc_malloc<double>(sbt, sbt_entries * sizeof(double), gpc_malloc<double>(sbt, sbt_entries * sizeof(double),
const_cast<char *>("sbt creation")); const_cast<char *>("sbt creation"));
PADDLE_ENFORCE_NOT_NULL(sbt);
build_sbt(&scanbeam, sbt, sbtree); build_sbt(&scanbeam, sbt, sbtree);
scanbeam = 0; scanbeam = 0;
free_sbtree(&sbtree); free_sbtree(&sbtree);
...@@ -1604,6 +1608,7 @@ void gpc_tristrip_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, ...@@ -1604,6 +1608,7 @@ void gpc_tristrip_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip,
/* Build scanbeam table from scanbeam tree */ /* Build scanbeam table from scanbeam tree */
gpc_malloc<double>(sbt, sbt_entries * sizeof(double), gpc_malloc<double>(sbt, sbt_entries * sizeof(double),
const_cast<char *>("sbt creation")); const_cast<char *>("sbt creation"));
PADDLE_ENFORCE_NOT_NULL(sbt);
build_sbt(&scanbeam, sbt, sbtree); build_sbt(&scanbeam, sbt, sbtree);
scanbeam = 0; scanbeam = 0;
free_sbtree(&sbtree); free_sbtree(&sbtree);
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <nccl.h> #include <nccl.h>
#endif #endif
#include <limits> #include <limits>
#include <memory>
#include <thread> // NOLINT #include <thread> // NOLINT
#include "google/protobuf/io/coded_stream.h" #include "google/protobuf/io/coded_stream.h"
...@@ -104,8 +105,10 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, ...@@ -104,8 +105,10 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber, e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber,
payload->memory_size()); payload->memory_size());
if (payload->memory_size() >= std::numeric_limits<int>::max()) { if (payload->memory_size() >= std::numeric_limits<int>::max()) {
LOG(FATAL) << "AppendZeroCopy varname:" << name LOG(FATAL) << "FATAL error: varname:" << name
<< ", vlen:" << payload->memory_size(); << ", vlen:" << payload->memory_size()
<< " >= std::numeric_limits<int>::max():"
<< std::numeric_limits<int>::max() << ", so exit!";
} }
// steal reference of tensor data // steal reference of tensor data
::grpc::Slice slices[4]; // metadata, tensor, rows meta, rows ::grpc::Slice slices[4]; // metadata, tensor, rows meta, rows
......
...@@ -37,10 +37,19 @@ class InterpolateOp : public framework::OperatorWithKernel { ...@@ -37,10 +37,19 @@ class InterpolateOp : public framework::OperatorWithKernel {
"Interpolation method can only be \"bilinear\" or \"nearest\"."); "Interpolation method can only be \"bilinear\" or \"nearest\".");
auto dim_x = ctx->GetInputDim("X"); // NCHW format auto dim_x = ctx->GetInputDim("X"); // NCHW format
int out_h = ctx->Attrs().Get<int>("out_h");
int out_w = ctx->Attrs().Get<int>("out_w");
PADDLE_ENFORCE_EQ(dim_x.size(), 4, "X's dimension must be 4"); PADDLE_ENFORCE_EQ(dim_x.size(), 4, "X's dimension must be 4");
int out_h, out_w;
float scale = ctx->Attrs().Get<float>("scale");
if (scale > 0) {
// round down
out_h = static_cast<int>(dim_x[2] * scale);
out_w = static_cast<int>(dim_x[3] * scale);
} else {
out_h = ctx->Attrs().Get<int>("out_h");
out_w = ctx->Attrs().Get<int>("out_w");
}
if (ctx->HasInput("OutSize") && ctx->IsRuntime()) { if (ctx->HasInput("OutSize") && ctx->IsRuntime()) {
auto out_size_dim = ctx->GetInputDim("OutSize"); auto out_size_dim = ctx->GetInputDim("OutSize");
PADDLE_ENFORCE_EQ(out_size_dim.size(), 1, PADDLE_ENFORCE_EQ(out_size_dim.size(), 1,
...@@ -77,6 +86,7 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -77,6 +86,7 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<int>("out_h", "output height of interpolate op."); AddAttr<int>("out_h", "output height of interpolate op.");
AddAttr<int>("out_w", "output width of interpolate op."); AddAttr<int>("out_w", "output width of interpolate op.");
AddAttr<float>("scale", "scale factor of interpolate op.").SetDefault(0.);
AddAttr<std::string>("interp_method", AddAttr<std::string>("interp_method",
"(string, default \"bilinear\"), interpolation " "(string, default \"bilinear\"), interpolation "
"method, can be \"bilinear\" for " "method, can be \"bilinear\" for "
......
...@@ -192,9 +192,21 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> { ...@@ -192,9 +192,21 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
auto* output = ctx.Output<Tensor>("Out"); auto* output = ctx.Output<Tensor>("Out");
auto* input_data = input->data<T>(); auto* input_data = input->data<T>();
int n = input->dims()[0];
int c = input->dims()[1];
int in_h = input->dims()[2];
int in_w = input->dims()[3];
auto interp_method = ctx.Attr<std::string>("interp_method"); auto interp_method = ctx.Attr<std::string>("interp_method");
int out_h = ctx.Attr<int>("out_h"); int out_h = ctx.Attr<int>("out_h");
int out_w = ctx.Attr<int>("out_w"); int out_w = ctx.Attr<int>("out_w");
float scale = ctx.Attr<float>("scale");
if (scale > 0) {
out_h = in_h * scale;
out_w = in_w * scale;
}
auto out_size = ctx.Input<Tensor>("OutSize"); auto out_size = ctx.Input<Tensor>("OutSize");
if (out_size != nullptr) { if (out_size != nullptr) {
Tensor sizes; Tensor sizes;
...@@ -207,11 +219,6 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> { ...@@ -207,11 +219,6 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
bool align_corners = ctx.Attr<bool>("align_corners"); bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode"); int align_mode = ctx.Attr<int>("align_mode");
int n = input->dims()[0];
int c = input->dims()[1];
int in_h = input->dims()[2];
int in_w = input->dims()[3];
auto* output_data = auto* output_data =
output->mutable_data<T>({n, c, out_h, out_w}, ctx.GetPlace()); output->mutable_data<T>({n, c, out_h, out_w}, ctx.GetPlace());
...@@ -268,14 +275,20 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> { ...@@ -268,14 +275,20 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
math::SetConstant<platform::CUDADeviceContext, T> zero; math::SetConstant<platform::CUDADeviceContext, T> zero;
zero(device_ctx, input_grad, static_cast<T>(0.0)); zero(device_ctx, input_grad, static_cast<T>(0.0));
int n = input_grad->dims()[0];
int c = input_grad->dims()[1];
int in_h = input_grad->dims()[2];
int in_w = input_grad->dims()[3];
auto interp_method = ctx.Attr<std::string>("interp_method"); auto interp_method = ctx.Attr<std::string>("interp_method");
int out_h = ctx.Attr<int>("out_h"); int out_h = ctx.Attr<int>("out_h");
int out_w = ctx.Attr<int>("out_w"); int out_w = ctx.Attr<int>("out_w");
float scale = ctx.Attr<float>("scale");
if (scale > 0) {
out_h = in_h * scale;
out_w - in_w* scale;
}
auto out_size = ctx.Input<Tensor>("OutSize"); auto out_size = ctx.Input<Tensor>("OutSize");
bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode");
if (out_size != nullptr) { if (out_size != nullptr) {
Tensor sizes; Tensor sizes;
framework::TensorCopy(*out_size, platform::CPUPlace(), &sizes); framework::TensorCopy(*out_size, platform::CPUPlace(), &sizes);
...@@ -284,10 +297,8 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> { ...@@ -284,10 +297,8 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
out_w = size_data[1]; out_w = size_data[1];
} }
int n = input_grad->dims()[0]; bool align_corners = ctx.Attr<bool>("align_corners");
int c = input_grad->dims()[1]; int align_mode = ctx.Attr<int>("align_mode");
int in_h = input_grad->dims()[2];
int in_w = input_grad->dims()[3];
int in_hw = in_h * in_w; int in_hw = in_h * in_w;
int out_hw = out_h * out_w; int out_hw = out_h * out_w;
......
...@@ -163,9 +163,21 @@ class InterpolateKernel : public framework::OpKernel<T> { ...@@ -163,9 +163,21 @@ class InterpolateKernel : public framework::OpKernel<T> {
auto* input = ctx.Input<Tensor>("X"); auto* input = ctx.Input<Tensor>("X");
auto* output = ctx.Output<Tensor>("Out"); auto* output = ctx.Output<Tensor>("Out");
const int n = input->dims()[0];
const int c = input->dims()[1];
const int in_h = input->dims()[2];
const int in_w = input->dims()[3];
std::string interp_method = ctx.Attr<std::string>("interp_method"); std::string interp_method = ctx.Attr<std::string>("interp_method");
int out_h = ctx.Attr<int>("out_h"); int out_h = ctx.Attr<int>("out_h");
int out_w = ctx.Attr<int>("out_w"); int out_w = ctx.Attr<int>("out_w");
float scale = ctx.Attr<float>("scale");
if (scale > 0) {
out_h = static_cast<int>(in_h * scale);
out_w = static_cast<int>(in_w * scale);
}
auto out_size = ctx.Input<Tensor>("OutSize"); auto out_size = ctx.Input<Tensor>("OutSize");
if (out_size != nullptr) { if (out_size != nullptr) {
auto out_size_data = out_size->data<int>(); auto out_size_data = out_size->data<int>();
...@@ -175,11 +187,6 @@ class InterpolateKernel : public framework::OpKernel<T> { ...@@ -175,11 +187,6 @@ class InterpolateKernel : public framework::OpKernel<T> {
bool align_corners = ctx.Attr<bool>("align_corners"); bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode"); int align_mode = ctx.Attr<int>("align_mode");
const int n = input->dims()[0];
const int c = input->dims()[1];
const int in_h = input->dims()[2];
const int in_w = input->dims()[3];
output->mutable_data<T>({n, c, out_h, out_w}, ctx.GetPlace()); output->mutable_data<T>({n, c, out_h, out_w}, ctx.GetPlace());
auto& device_ctx = auto& device_ctx =
ctx.template device_context<platform::CPUDeviceContext>(); ctx.template device_context<platform::CPUDeviceContext>();
...@@ -221,23 +228,31 @@ class InterpolateGradKernel : public framework::OpKernel<T> { ...@@ -221,23 +228,31 @@ class InterpolateGradKernel : public framework::OpKernel<T> {
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X")); auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Out")); auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
const int n = input->dims()[0];
const int c = input->dims()[1];
const int in_h = input->dims()[2];
const int in_w = input->dims()[3];
std::string interp_method = ctx.Attr<std::string>("interp_method"); std::string interp_method = ctx.Attr<std::string>("interp_method");
int out_h = ctx.Attr<int>("out_h"); int out_h = ctx.Attr<int>("out_h");
int out_w = ctx.Attr<int>("out_w"); int out_w = ctx.Attr<int>("out_w");
float scale = ctx.Attr<float>("scale");
if (scale > 0) {
out_h = static_cast<int>(in_h * scale);
out_w = static_cast<int>(in_w * scale);
}
auto out_size = ctx.Input<Tensor>("OutSize"); auto out_size = ctx.Input<Tensor>("OutSize");
if (out_size != nullptr) { if (out_size != nullptr) {
auto out_size_data = out_size->data<int>(); auto out_size_data = out_size->data<int>();
out_h = out_size_data[0]; out_h = out_size_data[0];
out_w = out_size_data[1]; out_w = out_size_data[1];
} }
bool align_corners = ctx.Attr<bool>("align_corners"); bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode"); int align_mode = ctx.Attr<int>("align_mode");
const int n = input->dims()[0];
const int c = input->dims()[1];
const int in_h = input->dims()[2];
const int in_w = input->dims()[3];
input_grad->mutable_data<T>({n, c, in_h, in_w}, ctx.GetPlace()); input_grad->mutable_data<T>({n, c, in_h, in_w}, ctx.GetPlace());
auto& device_ctx = auto& device_ctx =
ctx.template device_context<platform::CPUDeviceContext>(); ctx.template device_context<platform::CPUDeviceContext>();
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/linspace_op.h"
namespace paddle {
namespace operators {
class LinspaceOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Start"),
"Input(Start) of LinspaceOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Stop"),
"Input(Stop) of LinspaceOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Num"),
"Input(Num) of LinspaceOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(OUt) of LinspaceOp should not be null.");
auto s_dims = ctx->GetInputDim("Start");
PADDLE_ENFORCE((s_dims.size() == 1) && (s_dims[0] == 1),
"The shape of Input(Start) should be [1].");
auto e_dims = ctx->GetInputDim("Stop");
PADDLE_ENFORCE((e_dims.size() == 1) && (e_dims[0] == 1),
"The shape of Input(Stop) should be [1].");
auto step_dims = ctx->GetInputDim("Num");
PADDLE_ENFORCE((step_dims.size() == 1) && (step_dims[0] == 1),
"The shape of Input(Num) should be [1].");
ctx->SetOutputDim("Out", {-1});
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
framework::LibraryType library_{framework::LibraryType::kPlain};
framework::DataLayout layout_ = framework::DataLayout::kAnyLayout;
return framework::OpKernelType(
ctx.Input<framework::Tensor>("Start")->type(), ctx.device_context(),
layout_, library_);
}
};
class LinspaceOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Start",
"First entry in the sequence. It is a tensor of shape [1], should "
"be of type float32 or float64.");
AddInput("Stop",
"Last entry in the sequence. It is a tensor of shape [1], should "
"be of type float32 or float64.");
AddInput("Num",
"Number of entry in the sequence. It is a tensor of shape [1], "
"should be of type int32.");
AddOutput("Out", "A sequence of numbers.");
AddComment(R"DOC(
Return fixed number of evenly spaced values within a given interval. First entry is start, and last entry is stop. In the case when Num is 1, only Start is returned. Like linspace function of numpy.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(linspace, ops::LinspaceOp, ops::LinspaceOpMaker);
REGISTER_OP_CPU_KERNEL(linspace, ops::CPULinspaceKernel<float>,
ops::CPULinspaceKernel<double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/linspace_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
namespace paddle {
namespace operators {
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
i += blockDim.x * gridDim.x)
template <typename T>
__global__ void LinspaceKernel(T start, T step, int64_t size, T* out) {
CUDA_1D_KERNEL_LOOP(index, size) { out[index] = start + step * index; }
}
template <typename T>
__global__ void LinspaceSpecialKernel(T start, T* out) {
out[0] = start;
}
template <typename T>
class CUDALinspaceKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* start_t = context.Input<framework::Tensor>("Start");
auto* stop_t = context.Input<framework::Tensor>("Stop");
auto* num_t = context.Input<framework::Tensor>("Num");
auto* out = context.Output<framework::Tensor>("Out");
framework::Tensor n;
framework::TensorCopy(*start_t, platform::CPUPlace(), &n);
T start = n.data<T>()[0];
framework::TensorCopy(*stop_t, platform::CPUPlace(), &n);
T stop = n.data<T>()[0];
framework::TensorCopy(*num_t, platform::CPUPlace(), &n);
int32_t num = n.data<int32_t>()[0];
PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0.");
out->Resize(framework::make_ddim({num}));
T* out_data = out->mutable_data<T>(context.GetPlace());
T step = 0;
if (num != 1) {
step = (stop - start) / (num - 1);
}
auto stream = context.cuda_device_context().stream();
int block = 512;
int grid = (num + block - 1) / block;
LinspaceKernel<T><<<grid, block, 0, stream>>>(start, step, num, out_data);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(linspace, ops::CUDALinspaceKernel<float>,
ops::CUDALinspaceKernel<double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
namespace operators {
template <typename T>
class CPULinspaceKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
T start = context.Input<framework::Tensor>("Start")->data<T>()[0];
T stop = context.Input<framework::Tensor>("Stop")->data<T>()[0];
int32_t num = context.Input<framework::Tensor>("Num")->data<int32_t>()[0];
auto* out = context.Output<framework::Tensor>("Out");
PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0.");
out->Resize(framework::make_ddim({num}));
T* out_data = out->mutable_data<T>(context.GetPlace());
if (num > 1) {
T step = (stop - start) / (num - 1);
T value = start;
for (int i = 0; i < num; ++i) {
out_data[i] = value;
value += step;
}
} else {
out_data[0] = start;
}
}
};
} // namespace operators
} // namespace paddle
...@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/lookup_table_op.h" #include "paddle/fluid/operators/lookup_table_op.h"
#include <memory>
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/var_type_inference.h" #include "paddle/fluid/framework/var_type_inference.h"
namespace paddle { namespace paddle {
...@@ -119,6 +123,29 @@ or not. And the output only shares the LoD information with input Ids. ...@@ -119,6 +123,29 @@ or not. And the output only shares the LoD information with input Ids.
} }
}; };
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(LookupTableGradOpNoBuffer, "W");
class LookupTableGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("lookup_table_grad");
op->SetInput("W", Input("W"));
op->SetInput("Ids", Input("Ids"));
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetOutput(framework::GradVarName("W"), InputGrad("W"));
op->SetAttrMap(Attrs());
return op;
}
};
class LookupTableOpGrad : public framework::OperatorWithKernel { class LookupTableOpGrad : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -131,7 +158,8 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { ...@@ -131,7 +158,8 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("Out")); auto data_type = framework::GetDataTypeOfVar(
ctx.InputVar(framework::GradVarName("Out")));
return framework::OpKernelType(data_type, ctx.device_context()); return framework::OpKernelType(data_type, ctx.device_context());
} }
}; };
...@@ -159,10 +187,11 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference { ...@@ -159,10 +187,11 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(lookup_table, ops::LookupTableOp, REGISTER_OPERATOR(lookup_table, ops::LookupTableOp, ops::LookupTableOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>, ops::LookupTableGradOpDescMaker);
ops::LookupTableOpMaker);
REGISTER_OPERATOR(lookup_table_grad, ops::LookupTableOpGrad, REGISTER_OPERATOR(lookup_table_grad, ops::LookupTableOpGrad,
ops::LookupTableGradOpNoBuffer,
ops::LookupTableOpGradVarTypeInference); ops::LookupTableOpGradVarTypeInference);
REGISTER_OP_CPU_KERNEL(lookup_table, ops::LookupTableKernel<float>, REGISTER_OP_CPU_KERNEL(lookup_table, ops::LookupTableKernel<float>,
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_all_op.h"
REGISTER_REDUCE_OP(reduce_all);
REGISTER_OP_CPU_KERNEL(reduce_all,
ops::ReduceKernel<paddle::platform::CPUDeviceContext,
bool, ops::AllFunctor>);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_all_op.h"
REGISTER_OP_CUDA_KERNEL(reduce_all,
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
bool, ops::AllFunctor>);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
namespace paddle {
namespace operators {
struct AllFunctor {
template <typename DeviceContext, typename X, typename Y, typename Dim>
void operator()(const DeviceContext& place, X* x, Y* y, const Dim& dim) {
y->device(place) = x->all(dim);
}
};
} // namespace operators
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. Any Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_any_op.h"
REGISTER_REDUCE_OP(reduce_any);
REGISTER_OP_CPU_KERNEL(reduce_any,
ops::ReduceKernel<paddle::platform::CPUDeviceContext,
bool, ops::AnyFunctor>);
// Copyright (c) 2018 PaddlePaddle Authors. Any Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_any_op.h"
REGISTER_OP_CUDA_KERNEL(reduce_any,
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
bool, ops::AnyFunctor>);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
namespace paddle {
namespace operators {
struct AnyFunctor {
template <typename DeviceContext, typename X, typename Y, typename Dim>
void operator()(const DeviceContext& place, X* x, Y* y, const Dim& dim) {
y->device(place) = x->any(dim);
}
};
} // namespace operators
} // namespace paddle
...@@ -13,12 +13,18 @@ See the License for the specific language governing permissions and ...@@ -13,12 +13,18 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/space_to_depth_op.h" #include "paddle/fluid/operators/space_to_depth_op.h"
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
class SpaceToDepthOp : public framework::OperatorWithKernel { class SpaceToDepthOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -100,6 +106,28 @@ class SpaceToDepthOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -100,6 +106,28 @@ class SpaceToDepthOpMaker : public framework::OpProtoAndCheckerMaker {
} }
}; };
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(SpaceToDepthGradOpNoBuffer, "X");
class SpaceToDepthGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("space_to_depth_grad");
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetInput("X", Input("X"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
return op;
}
};
class SpaceToDepthGradOp : public framework::OperatorWithKernel { class SpaceToDepthGradOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -110,6 +138,14 @@ class SpaceToDepthGradOp : public framework::OperatorWithKernel { ...@@ -110,6 +138,14 @@ class SpaceToDepthGradOp : public framework::OperatorWithKernel {
"Input(Out@GRAD) shouldn't be null."); "Input(Out@GRAD) shouldn't be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
} }
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
ctx.Input<Tensor>(framework::GradVarName("Out"))->type(),
ctx.GetPlace());
}
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -117,8 +153,9 @@ class SpaceToDepthGradOp : public framework::OperatorWithKernel { ...@@ -117,8 +153,9 @@ class SpaceToDepthGradOp : public framework::OperatorWithKernel {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(space_to_depth, ops::SpaceToDepthOp, ops::SpaceToDepthOpMaker, REGISTER_OPERATOR(space_to_depth, ops::SpaceToDepthOp, ops::SpaceToDepthOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>); ops::SpaceToDepthGradOpDescMaker);
REGISTER_OPERATOR(space_to_depth_grad, ops::SpaceToDepthGradOp); REGISTER_OPERATOR(space_to_depth_grad, ops::SpaceToDepthGradOp,
ops::SpaceToDepthGradOpNoBuffer);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
space_to_depth, space_to_depth,
ops::SpaceToDepthKernel<paddle::platform::CPUDeviceContext, float>, ops::SpaceToDepthKernel<paddle::platform::CPUDeviceContext, float>,
......
...@@ -14,6 +14,10 @@ limitations under the License. */ ...@@ -14,6 +14,10 @@ limitations under the License. */
#include "paddle/fluid/operators/squared_l2_distance_op.h" #include "paddle/fluid/operators/squared_l2_distance_op.h"
#include <memory>
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -54,6 +58,34 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { ...@@ -54,6 +58,34 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel {
} }
}; };
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(SquaredL2DistanceGradOpNoBuffer, "X",
"Y");
class SquaredL2DistanceGradOpDescMaker
: public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("squared_l2_distance_grad");
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetInput("sub_result", Output("sub_result"));
op->SetInput("X", Input("X"));
op->SetInput("Y", Input("Y"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetOutput(framework::GradVarName("Y"), InputGrad("Y"));
op->SetAttrMap(Attrs());
return op;
}
};
class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker { class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
...@@ -88,6 +120,7 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { ...@@ -88,6 +120,7 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Gradient of Out should not be null"); "Gradient of Out should not be null");
PADDLE_ENFORCE(ctx->HasInput("sub_result"), "SubResult should not be null");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y"); auto y_dims = ctx->GetInputDim("Y");
...@@ -102,6 +135,13 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { ...@@ -102,6 +135,13 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel {
if (ctx->HasOutput(x_grad_name)) ctx->SetOutputDim(x_grad_name, x_dims); if (ctx->HasOutput(x_grad_name)) ctx->SetOutputDim(x_grad_name, x_dims);
if (ctx->HasOutput(y_grad_name)) ctx->SetOutputDim(y_grad_name, y_dims); if (ctx->HasOutput(y_grad_name)) ctx->SetOutputDim(y_grad_name, y_dims);
} }
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(ctx.Input<Tensor>("sub_result")->type(),
ctx.GetPlace());
}
}; };
} // namespace operators } // namespace operators
...@@ -110,8 +150,9 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { ...@@ -110,8 +150,9 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(squared_l2_distance, ops::SquaredL2DistanceOp, REGISTER_OPERATOR(squared_l2_distance, ops::SquaredL2DistanceOp,
ops::SquaredL2DistanceOpMaker, ops::SquaredL2DistanceOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>); ops::SquaredL2DistanceGradOpDescMaker);
REGISTER_OPERATOR(squared_l2_distance_grad, ops::SquaredL2DistanceGradOp); REGISTER_OPERATOR(squared_l2_distance_grad, ops::SquaredL2DistanceGradOp,
ops::SquaredL2DistanceGradOpNoBuffer);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
squared_l2_distance, squared_l2_distance,
ops::SquaredL2DistanceKernel<paddle::platform::CPUDeviceContext, float>); ops::SquaredL2DistanceKernel<paddle::platform::CPUDeviceContext, float>);
......
...@@ -77,6 +77,9 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel<T> { ...@@ -77,6 +77,9 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel<T> {
auto* x_g = context.Output<Tensor>(framework::GradVarName("X")); auto* x_g = context.Output<Tensor>(framework::GradVarName("X"));
auto* y_g = context.Output<Tensor>(framework::GradVarName("Y")); auto* y_g = context.Output<Tensor>(framework::GradVarName("Y"));
PADDLE_ENFORCE_NOT_NULL(x_g);
PADDLE_ENFORCE_NOT_NULL(y_g);
auto sub_result = EigenMatrix<T>::From(*in0); auto sub_result = EigenMatrix<T>::From(*in0);
auto out_grad = EigenMatrix<T>::From(*in1); auto out_grad = EigenMatrix<T>::From(*in1);
...@@ -92,31 +95,28 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel<T> { ...@@ -92,31 +95,28 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel<T> {
// propagate back to input // propagate back to input
auto& eigen_place = auto& eigen_place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
if (x_g) {
x_g->mutable_data<T>(context.GetPlace());
// eigen matrix
auto x_grad =
EigenMatrix<T>::From(*x_g, framework::make_ddim({x_dims[0], cols}));
// dimensions are same with subResult
x_grad.device(eigen_place) = grad_mat;
}
if (y_g) { x_g->mutable_data<T>(context.GetPlace());
y_g->mutable_data<T>(context.GetPlace()); // eigen matrix
auto x_grad =
PADDLE_ENFORCE_GE(sub_result.dimensions()[0], y_dims[0], EigenMatrix<T>::From(*x_g, framework::make_ddim({x_dims[0], cols}));
"First dimension of gradient must be greater or " // dimensions are same with subResult
"equal than first dimension of target."); x_grad.device(eigen_place) = grad_mat;
if (sub_result.dimensions()[0] == y_dims[0]) { y_g->mutable_data<T>(context.GetPlace());
auto y_grad =
EigenMatrix<T>::From(*y_g, framework::make_ddim({y_dims[0], cols})); PADDLE_ENFORCE_GE(sub_result.dimensions()[0], y_dims[0],
y_grad.device(eigen_place) = -1 * grad_mat; "First dimension of gradient must be greater or "
} else { "equal than first dimension of target.");
auto col_sum_res = -1 * (grad_mat.sum(Eigen::array<int, 1>({{0}})));
auto y_grad = EigenVector<T>::Flatten(*y_g); if (sub_result.dimensions()[0] == y_dims[0]) {
y_grad.device(eigen_place) = col_sum_res; auto y_grad =
} EigenMatrix<T>::From(*y_g, framework::make_ddim({y_dims[0], cols}));
y_grad.device(eigen_place) = -1 * grad_mat;
} else {
auto col_sum_res = -1 * (grad_mat.sum(Eigen::array<int, 1>({{0}})));
auto y_grad = EigenVector<T>::Flatten(*y_g);
y_grad.device(eigen_place) = col_sum_res;
} }
} }
}; };
......
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/squared_l2_norm_op.h" #include "paddle/fluid/operators/squared_l2_norm_op.h"
#include <memory>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -31,6 +33,26 @@ class SquaredL2NormOp : public framework::OperatorWithKernel { ...@@ -31,6 +33,26 @@ class SquaredL2NormOp : public framework::OperatorWithKernel {
} }
}; };
class SquaredL2NormGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("squared_l2_norm_grad");
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetInput("X", Input("X"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
return op;
}
};
class SquaredL2NormGradOp : public framework::OperatorWithKernel { class SquaredL2NormGradOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -67,8 +89,7 @@ $$Out = \sum_{i} X_{i}^2$$ ...@@ -67,8 +89,7 @@ $$Out = \sum_{i} X_{i}^2$$
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(squared_l2_norm, ops::SquaredL2NormOp, REGISTER_OPERATOR(squared_l2_norm, ops::SquaredL2NormOp,
ops::SquaredL2NormOpMaker, ops::SquaredL2NormOpMaker, ops::SquaredL2NormGradOpDescMaker);
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(squared_l2_norm_grad, ops::SquaredL2NormGradOp); REGISTER_OPERATOR(squared_l2_norm_grad, ops::SquaredL2NormGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
squared_l2_norm, squared_l2_norm,
......
...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/teacher_student_sigmoid_loss_op.h" #include "paddle/fluid/operators/teacher_student_sigmoid_loss_op.h"
#include <memory>
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
namespace paddle { namespace paddle {
...@@ -55,6 +58,28 @@ class TeacherStudentSigmoidLossOp : public framework::OperatorWithKernel { ...@@ -55,6 +58,28 @@ class TeacherStudentSigmoidLossOp : public framework::OperatorWithKernel {
} }
}; };
class TeacherStudentSigmoidLossGradOpDescMaker
: public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("teacher_student_sigmoid_loss_grad");
op->SetInput("X", Input("X"));
op->SetInput("Label", Input("Label"));
op->SetInput(framework::GradVarName("Y"), OutputGrad("Y"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
return op;
}
};
class TeacherStudentSigmoidLossGradientOp class TeacherStudentSigmoidLossGradientOp
: public framework::OperatorWithKernel { : public framework::OperatorWithKernel {
public: public:
...@@ -148,7 +173,7 @@ namespace ops = paddle::operators; ...@@ -148,7 +173,7 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(teacher_student_sigmoid_loss, REGISTER_OPERATOR(teacher_student_sigmoid_loss,
ops::TeacherStudentSigmoidLossOp, ops::TeacherStudentSigmoidLossOp,
ops::TeacherStudentSigmoidLossOpMaker, ops::TeacherStudentSigmoidLossOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>); ops::TeacherStudentSigmoidLossGradOpDescMaker);
REGISTER_OPERATOR(teacher_student_sigmoid_loss_grad, REGISTER_OPERATOR(teacher_student_sigmoid_loss_grad,
ops::TeacherStudentSigmoidLossGradientOp); ops::TeacherStudentSigmoidLossGradientOp);
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/operators/tree_conv_op.h" #include "paddle/fluid/operators/tree_conv_op.h"
#include <memory>
#include <string> #include <string>
namespace paddle { namespace paddle {
...@@ -86,6 +88,30 @@ class TreeConvOp : public framework::OperatorWithKernel { ...@@ -86,6 +88,30 @@ class TreeConvOp : public framework::OperatorWithKernel {
} }
}; };
class TreeConvGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("tree_conv_grad");
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetInput("Filter", Input("Filter"));
op->SetInput("EdgeSet", Input("EdgeSet"));
op->SetInput("NodesVector", Input("NodesVector"));
op->SetOutput(framework::GradVarName("NodesVector"),
InputGrad("NodesVector"));
op->SetOutput(framework::GradVarName("Filter"), InputGrad("Filter"));
op->SetAttrMap(Attrs());
return op;
}
};
class TreeConvGradOp : public framework::OperatorWithKernel { class TreeConvGradOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -115,7 +141,7 @@ class TreeConvGradOp : public framework::OperatorWithKernel { ...@@ -115,7 +141,7 @@ class TreeConvGradOp : public framework::OperatorWithKernel {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(tree_conv, ops::TreeConvOp, ops::TreeConvOpMaker, REGISTER_OPERATOR(tree_conv, ops::TreeConvOp, ops::TreeConvOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>); ops::TreeConvGradOpDescMaker);
REGISTER_OPERATOR(tree_conv_grad, ops::TreeConvGradOp); REGISTER_OPERATOR(tree_conv_grad, ops::TreeConvGradOp);
......
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/warpctc_op.h" #include "paddle/fluid/operators/warpctc_op.h"
#include <memory>
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/cudnn_helper.h"
#endif #endif
...@@ -118,6 +120,27 @@ http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf). ...@@ -118,6 +120,27 @@ http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf).
} }
}; };
class WarpCTCGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("warpctc_grad");
op->SetInput("WarpCTCGrad", Output("WarpCTCGrad"));
op->SetInput("Logits", Input("Logits"));
op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss"));
op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits"));
op->SetAttrMap(Attrs());
return op;
}
};
class WarpCTCGradOp : public framework::OperatorWithKernel { class WarpCTCGradOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -145,7 +168,7 @@ class WarpCTCGradOp : public framework::OperatorWithKernel { ...@@ -145,7 +168,7 @@ class WarpCTCGradOp : public framework::OperatorWithKernel {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(warpctc, ops::WarpCTCOp, ops::WarpCTCOpMaker, REGISTER_OPERATOR(warpctc, ops::WarpCTCOp, ops::WarpCTCOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>); ops::WarpCTCGradOpDescMaker);
REGISTER_OPERATOR(warpctc_grad, ops::WarpCTCGradOp); REGISTER_OPERATOR(warpctc_grad, ops::WarpCTCGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
warpctc, ops::WarpCTCKernel<paddle::platform::CPUDeviceContext, float>); warpctc, ops::WarpCTCKernel<paddle::platform::CPUDeviceContext, float>);
......
...@@ -1366,6 +1366,10 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1366,6 +1366,10 @@ All parameter, weight, gradient are variables in Paddle.
"cache_runtime_context", "cache_runtime_context",
[](const BuildStrategy &self) { return self.cache_runtime_context_; }, [](const BuildStrategy &self) { return self.cache_runtime_context_; },
[](BuildStrategy &self, bool b) { self.cache_runtime_context_ = b; }) [](BuildStrategy &self, bool b) { self.cache_runtime_context_ = b; })
.def_property(
"cache_expected_kernel",
[](const BuildStrategy &self) { return self.cache_expected_kernel_; },
[](BuildStrategy &self, bool b) { self.cache_expected_kernel_ = b; })
.def("_finalize_strategy_and_create_passes", .def("_finalize_strategy_and_create_passes",
[](BuildStrategy &self) -> std::shared_ptr<ir::PassBuilder> { [](BuildStrategy &self) -> std::shared_ptr<ir::PassBuilder> {
return self.CreatePassesFromStrategy(true); return self.CreatePassesFromStrategy(true);
......
...@@ -202,6 +202,7 @@ function cmake_gen() { ...@@ -202,6 +202,7 @@ function cmake_gen() {
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} -DWITH_CONTRIB=${WITH_CONTRIB:-ON}
-DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON}
-DWITH_HIGH_LEVEL_API_TEST=${WITH_HIGH_LEVEL_API_TEST:-OFF}
-DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR}
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF} -DWITH_ANAKIN=${WITH_ANAKIN:-OFF}
-DANAKIN_BUILD_FAT_BIN=${ANAKIN_BUILD_FAT_BIN:OFF} -DANAKIN_BUILD_FAT_BIN=${ANAKIN_BUILD_FAT_BIN:OFF}
...@@ -234,6 +235,7 @@ EOF ...@@ -234,6 +235,7 @@ EOF
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} \ -DWITH_CONTRIB=${WITH_CONTRIB:-ON} \
-DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} \ -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} \
-DWITH_HIGH_LEVEL_API_TEST=${WITH_HIGH_LEVEL_API_TEST:-OFF} \
-DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \ -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \ -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \
-DANAKIN_BUILD_FAT_BIN=${ANAKIN_BUILD_FAT_BIN:OFF}\ -DANAKIN_BUILD_FAT_BIN=${ANAKIN_BUILD_FAT_BIN:OFF}\
...@@ -291,8 +293,12 @@ function build() { ...@@ -291,8 +293,12 @@ function build() {
Building in /paddle/build ... Building in /paddle/build ...
============================================ ============================================
EOF EOF
parallel_number=`nproc`
if [[ "$1" != "" ]]; then
parallel_number=$1
fi
make clean make clean
make -j `nproc` make -j ${parallel_number}
make install -j `nproc` make install -j `nproc`
} }
...@@ -737,9 +743,13 @@ function gen_fluid_lib() { ...@@ -737,9 +743,13 @@ function gen_fluid_lib() {
Generating fluid library for train and inference ... Generating fluid library for train and inference ...
======================================== ========================================
EOF EOF
parallel_number=`nproc`
if [[ "$1" != "" ]]; then
parallel_number=$1
fi
cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON
make -j `nproc` fluid_lib_dist make -j ${parallel_number} fluid_lib_dist
make -j `nproc` inference_lib_dist make -j ${parallel_number} inference_lib_dist
} }
function tar_fluid_lib() { function tar_fluid_lib() {
...@@ -770,11 +780,22 @@ EOF ...@@ -770,11 +780,22 @@ EOF
function main() { function main() {
local CMD=$1 local CMD=$1
local parallel_number=$2
init init
case $CMD in case $CMD in
build_only)
cmake_gen ${PYTHON_ABI:-""}
build ${parallel_number}
;;
build_and_check)
cmake_gen ${PYTHON_ABI:-""}
build ${parallel_number}
assert_api_not_changed ${PYTHON_ABI:-""}
assert_api_spec_approvals
;;
build) build)
cmake_gen ${PYTHON_ABI:-""} cmake_gen ${PYTHON_ABI:-""}
build build ${parallel_number}
gen_dockerfile ${PYTHON_ABI:-""} gen_dockerfile ${PYTHON_ABI:-""}
;; ;;
test) test)
...@@ -797,7 +818,7 @@ function main() { ...@@ -797,7 +818,7 @@ function main() {
;; ;;
fluid_inference_lib) fluid_inference_lib)
cmake_gen ${PYTHON_ABI:-""} cmake_gen ${PYTHON_ABI:-""}
gen_fluid_lib gen_fluid_lib ${parallel_number}
tar_fluid_lib tar_fluid_lib
test_fluid_lib test_fluid_lib
;; ;;
...@@ -806,16 +827,16 @@ function main() { ...@@ -806,16 +827,16 @@ function main() {
;; ;;
cicheck) cicheck)
cmake_gen ${PYTHON_ABI:-""} cmake_gen ${PYTHON_ABI:-""}
build build ${parallel_number}
assert_api_not_changed ${PYTHON_ABI:-""} assert_api_not_changed ${PYTHON_ABI:-""}
run_test run_test
gen_fluid_lib gen_fluid_lib ${parallel_number}
test_fluid_lib test_fluid_lib
assert_api_spec_approvals assert_api_spec_approvals
;; ;;
cicheck_brpc) cicheck_brpc)
cmake_gen ${PYTHON_ABI:-""} cmake_gen ${PYTHON_ABI:-""}
build build ${parallel_number}
run_brpc_test run_brpc_test
;; ;;
assert_api) assert_api)
...@@ -823,7 +844,7 @@ function main() { ...@@ -823,7 +844,7 @@ function main() {
assert_api_spec_approvals assert_api_spec_approvals
;; ;;
test_inference) test_inference)
gen_fluid_lib gen_fluid_lib ${parallel_number}
test_fluid_lib test_fluid_lib
;; ;;
assert_api_approvals) assert_api_approvals)
...@@ -840,7 +861,7 @@ function main() { ...@@ -840,7 +861,7 @@ function main() {
;; ;;
cicheck_py35) cicheck_py35)
cmake_gen ${PYTHON_ABI:-""} cmake_gen ${PYTHON_ABI:-""}
build build ${parallel_number}
run_test run_test
assert_api_not_changed ${PYTHON_ABI:-""} assert_api_not_changed ${PYTHON_ABI:-""}
;; ;;
...@@ -848,7 +869,7 @@ function main() { ...@@ -848,7 +869,7 @@ function main() {
cmake_gen ${PYTHON_ABI:-""} cmake_gen ${PYTHON_ABI:-""}
;; ;;
gen_fluid_lib) gen_fluid_lib)
gen_fluid_lib gen_fluid_lib ${parallel_number}
;; ;;
test_fluid_lib) test_fluid_lib)
test_fluid_lib test_fluid_lib
......
...@@ -66,6 +66,8 @@ from . import compiler ...@@ -66,6 +66,8 @@ from . import compiler
from .compiler import * from .compiler import *
from paddle.fluid.layers.math_op_patch import monkey_patch_variable from paddle.fluid.layers.math_op_patch import monkey_patch_variable
from . import install_check from . import install_check
from .dygraph.nn import *
from .dygraph.layers import *
Tensor = LoDTensor Tensor = LoDTensor
......
...@@ -136,6 +136,7 @@ class DatasetBase(object): ...@@ -136,6 +136,7 @@ class DatasetBase(object):
slot_var.name = var.name slot_var.name = var.name
if var.lod_level == 0: if var.lod_level == 0:
slot_var.is_dense = True slot_var.is_dense = True
slot_var.shape.extend(var.shape)
if var.dtype == core.VarDesc.VarType.FP32: if var.dtype == core.VarDesc.VarType.FP32:
slot_var.type = "float" slot_var.type = "float"
elif var.dtype == core.VarDesc.VarType.INT64: elif var.dtype == core.VarDesc.VarType.INT64:
......
...@@ -22,7 +22,7 @@ __all__ = ['enabled', 'guard', 'to_variable'] ...@@ -22,7 +22,7 @@ __all__ = ['enabled', 'guard', 'to_variable']
def enabled(): def enabled():
return framework._in_dygraph_mode() return framework.in_dygraph_mode()
@signature_safe_contextmanager @signature_safe_contextmanager
......
...@@ -97,20 +97,12 @@ def load_persistables(vardict, dirname, filename=None): ...@@ -97,20 +97,12 @@ def load_persistables(vardict, dirname, filename=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
my_layer = layer(fluid.dygraph.Layer) my_layer = layer(fluid.Layer)
param_path = "./my_paddle_model" param_path = "./my_paddle_model"
param_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path) param_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path)
param_1 = param_dict['PtbModel_0.w_1'] param_1 = param_dict['PtbModel_0.w_1']
or:
my_layer = layer(fluid.dygraph.Layer)
param_path = "./my_paddle_model"
filename = "model.file"
param_dict = fluid.dygraph.load_persistables(my_layer.state_dict(), param_path,
filename=filename)
param_1 = param_dict['PtbModel_0.w_1']
""" """
if isinstance(vardict, collections.OrderedDict): if isinstance(vardict, collections.OrderedDict):
return _load_var_from_file(vardict, dirname, filename) return _load_var_from_file(vardict, dirname, filename)
......
...@@ -16,7 +16,7 @@ from __future__ import print_function ...@@ -16,7 +16,7 @@ from __future__ import print_function
import copy import copy
import six import six
from ..framework import Parameter, _in_dygraph_mode from ..framework import Parameter, in_dygraph_mode
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from .. import core from .. import core
from six.moves import zip from six.moves import zip
......
...@@ -139,14 +139,14 @@ class Layer(core.Layer): ...@@ -139,14 +139,14 @@ class Layer(core.Layer):
def clear_gradients(self): def clear_gradients(self):
for p in self.parameters(): for p in self.parameters():
p._clear_gradient() p.clear_gradient()
def _build_once(self, *args): def build_once(self, *args):
pass pass
def __call__(self, *inputs): def __call__(self, *inputs):
if not self._built: if not self._built:
self._build_once(*inputs) self.build_once(*inputs)
outputs = self.forward(*inputs) outputs = self.forward(*inputs)
self._built = True self._built = True
......
...@@ -19,7 +19,7 @@ from six.moves import reduce ...@@ -19,7 +19,7 @@ from six.moves import reduce
from .. import core from .. import core
from ..layers import utils from ..layers import utils
from . import layers from . import layers
from ..framework import Variable, _in_dygraph_mode, OpProtoHolder, Parameter from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from ..initializer import Normal, Constant, NumpyArrayInitializer from ..initializer import Normal, Constant, NumpyArrayInitializer
import numpy as np import numpy as np
...@@ -33,6 +33,109 @@ __all__ = [ ...@@ -33,6 +33,109 @@ __all__ = [
class Conv2D(layers.Layer): class Conv2D(layers.Layer):
"""
The convolution2D layer calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. Input and
Output are in NCHW format, where N is batch size, C is the number of
channels, H is the height of the feature, and W is the width of the feature.
Filter is in MCHW format, where M is the number of output image channels,
C is the number of input image channels, H is the height of the filter,
and W is the width of the filter. If the groups is greater than 1,
C will equal the number of input image channels divided by the groups.
Please refer to UFLDL's `convolution
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
for more detials.
If bias attribution and activation type are provided, bias is added to the
output of the convolution, and the corresponding activation function is
applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
Where:
* :math:`X`: Input value, a tensor with NCHW format.
* :math:`W`: Filter value, a tensor with MCHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
Args:
input (Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of filter. It is as same as the output
image channel.
filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square.
stride (int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
padding (int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
dilation (int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
groups (int): The groups number of the Conv2d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1.
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv2d.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
act (str): Activation type, if it is set to None, activation is not appended.
Default: None
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None
Returns:
Variable: The tensor variable storing the convolution and \
non-linearity activation result.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu")
"""
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -265,7 +368,7 @@ class Conv3D(layers.Layer): ...@@ -265,7 +368,7 @@ class Conv3D(layers.Layer):
self._param_attr = param_attr self._param_attr = param_attr
self._bias_attr = bias_attr self._bias_attr = bias_attr
def _build_once(self, input): def build_once(self, input):
num_channels = input.shape[1] num_channels = input.shape[1]
self._dtype = self._helper.input_dtype(input) self._dtype = self._helper.input_dtype(input)
...@@ -332,6 +435,116 @@ class Conv3D(layers.Layer): ...@@ -332,6 +435,116 @@ class Conv3D(layers.Layer):
class Conv3DTranspose(layers.Layer): class Conv3DTranspose(layers.Layer):
"""
**Convlution3D transpose layer**
The convolution3D transpose layer calculates the output based on the input,
filter, and dilations, strides, paddings. Input(Input) and output(Output)
are in NCDHW format. Where N is batch size, C is the number of channels,
D is the depth of the feature, H is the height of the feature, and W
is the width of the feature. Parameters(dilations, strides, paddings) are
two elements. These two elements represent height and width, respectively.
The details of convolution transpose layer, please refer to the following
explanation and references `therein <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_.
If bias attribution and activation type are provided, bias is added to
the output of the convolution, and the corresponding activation function
is applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
In the above equation:
* :math:`X`: Input value, a tensor with NCDHW format.
* :math:`W`: Filter value, a tensor with MCDHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
Where
.. math::
D_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\\\
H_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\\\
W_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1
Args:
input(Variable): The input image with [N, C, D, H, W] format.
num_filters(int): The number of the filter. It is as same as the output
image channel.
output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain three integers, (image_D, image_H, image_W). This
parameter only works when filter_size is None.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain three integers, (filter_size_D, filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to
calculate filter_size.
padding(int|tuple): The padding size. If padding is a tuple, it must
contain three integers, (padding_D, padding_H, padding_W). Otherwise, the
padding_D = padding_H = padding_W = padding. Default: padding = 0.
stride(int|tuple): The stride size. If stride is a tuple, it must
contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
stride_D = stride_H = stride_W = stride. Default: stride = 1.
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
groups(int): The groups number of the Conv3d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: groups=1
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv3d_transpose.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv3d_transpose
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
act (str): Activation type, if it is set to None, activation is not appended.
Default: None.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The tensor variable storing the convolution transpose result.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
conv3d_transpose = nn.Conv3DTranspose(
'Conv3DTranspose',
num_filters=12,
filter_size=12,
use_cudnn=False)
transpose_res = conv3d_transpose(base.to_variable(input_array))
"""
def __init__(self, def __init__(self,
name_scope, name_scope,
num_filters, num_filters,
...@@ -362,7 +575,7 @@ class Conv3DTranspose(layers.Layer): ...@@ -362,7 +575,7 @@ class Conv3DTranspose(layers.Layer):
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._act = act self._act = act
def _build_once(self, input): def build_once(self, input):
self._dtype = self._helper.input_dtype(input) self._dtype = self._helper.input_dtype(input)
self._input_channel = input.shape[1] self._input_channel = input.shape[1]
...@@ -436,6 +649,54 @@ class Conv3DTranspose(layers.Layer): ...@@ -436,6 +649,54 @@ class Conv3DTranspose(layers.Layer):
class Pool2D(layers.Layer): class Pool2D(layers.Layer):
"""
${comment}
Args:
input (Variable): The input tensor of pooling operator. The format of
input tensor is NCHW, where N is batch size, C is
the number of channels, H is the height of the
feature, and W is the width of the feature.
pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be a square of an int.
pool_type: ${pooling_type_comment}
pool_stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain two integers, (pool_stride_Height, pool_stride_Width).
Otherwise, the pool stride size will be a square of an int.
pool_padding (int|list|tuple): The pool padding size. If pool padding size is a tuple,
it must contain two integers, (pool_padding_on_Height, pool_padding_on_Width).
Otherwise, the pool padding size will be a square of an int.
global_pooling (bool): ${global_pooling_comment}
use_cudnn (bool): ${use_cudnn_comment}
ceil_mode (bool): ${ceil_mode_comment}
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
exclusive (bool): Whether to exclude padding points in average pooling
mode, default is true
Returns:
Variable: The pooling result.
Raises:
ValueError: If 'pool_type' is not "max" nor "avg"
ValueError: If 'global_pooling' is False and 'pool_size' is -1
ValueError: If 'use_cudnn' is not a bool value.
Examples:
.. code-block:: python
data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32')
pool2d = fluid.Pool2D("pool2d",pool_size=2,
pool_type='max',
pool_stride=1,
global_pooling=False)
pool2d_res = pool2d(data)
"""
def __init__(self, def __init__(self,
name_scope, name_scope,
pool_size=-1, pool_size=-1,
...@@ -495,6 +756,102 @@ class Pool2D(layers.Layer): ...@@ -495,6 +756,102 @@ class Pool2D(layers.Layer):
class FC(layers.Layer): class FC(layers.Layer):
"""
**Fully Connected Layer**
This function creates a fully connected layer in the network. It can take
one or multiple tensors as its inputs(input can be a list of Variable, see
Args in detail). It creates a variable called weights for each input tensor,
which represents a fully connected weight matrix from each input unit to
each output unit. The fully connected layer multiplies each input tensor
with its corresponding weight to produce an output Tensor with shape [M, `size`],
where M is batch size. If multiple input tensors are given, the results of
multiple output tensors with shape [M, `size`] will be summed up. If bias_attr
is not None, a bias variable will be created and added to the output.
Finally, if activation is not None, it will be applied to the output as well.
When the input is single tensor:
.. math::
Out = Act({XW + b})
When the input are multiple tensors:
.. math::
Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
In the above equation:
* :math:`N`: Number of the input. N equals to len(input) if input is list of Variable.
* :math:`X_i`: The i-th input tensor.
* :math:`W_i`: The i-th weights matrix corresponding i-th input tensor.
* :math:`b`: The bias parameter created by this layer (if needed).
* :math:`Act`: The activation function.
* :math:`Out`: The output tensor.
See below for an example.
.. code-block:: text
Given:
data_1.data = [[[0.1, 0.2],
[0.3, 0.4]]]
data_1.shape = (1, 2, 2) # 1 is batch_size
data_2 = [[[0.1, 0.2, 0.3]]]
data_2.shape = (1, 1, 3)
out = fluid.layers.fc(input=[data_1, data_2], size=2)
Then:
out.data = [[0.18669507, 0.1893476]]
out.shape = (1, 2)
Args:
input (Variable|list of Variable): The input tensor(s) of this layer, and the dimension of
the input tensor(s) is at least 2.
size(int): The number of output units in this layer.
num_flatten_dims (int, default 1): The fc layer can accept an input tensor with more than
two dimensions. If this happens, the multidimensional tensor will first be flattened
into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
dimensions will be flatten to form the first dimension of the final matrix (height of
the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
form the second dimension of the final matrix (width of the matrix). For example, suppose
`X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30].
param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable
parameters/weights of this layer.
bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias
of this layer. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None.
act (str, default None): Activation to be applied to the output of this layer.
is_test(bool): A flag indicating whether execution is in test phase.
name (str, default None): The name of this layer.
Returns:
Variable: The transformation result.
Raises:
ValueError: If rank of the input tensor is less than 2.
Examples:
.. code-block:: python
# when input is single tensor
data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
fc = fluid.FC("fc", size=1000, act="tanh")
fc_res = fc(data)
# when input are multiple tensors
data_1 = fluid.layers.data(name="data_1", shape=[32, 32], dtype="float32")
data_2 = fluid.layers.data(name="data_2", shape=[24, 36], dtype="float32")
fc = fluid.FC("fc", size=1000, act="tanh")
fc_res = fc([data_1, data_2])
"""
def __init__(self, def __init__(self,
name_scope, name_scope,
size, size,
...@@ -522,7 +879,7 @@ class FC(layers.Layer): ...@@ -522,7 +879,7 @@ class FC(layers.Layer):
assert isinstance(value, Parameter) assert isinstance(value, Parameter)
self.__w[i] = value self.__w[i] = value
def _build_once(self, input): def build_once(self, input):
i = 0 i = 0
for inp, param in self._helper.iter_inputs_and_params(input, for inp, param in self._helper.iter_inputs_and_params(input,
self._param_attr): self._param_attr):
...@@ -591,6 +948,91 @@ class FC(layers.Layer): ...@@ -591,6 +948,91 @@ class FC(layers.Layer):
class BatchNorm(layers.Layer): class BatchNorm(layers.Layer):
"""
**Batch Normalization Layer**
Can be used as a normalizer function for conv2d and fully_connected operations.
The required data format for this layer is one of the following:
1. NHWC `[batch, in_height, in_width, in_channels]`
2. NCHW `[batch, in_channels, in_height, in_width]`
Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing
Internal Covariate Shift <https://arxiv.org/pdf/1502.03167.pdf>`_
for more details.
:math:`input` is the input features over a mini-batch.
.. math::
\\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
\ mini-batch\ mean \\\\
\\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
\\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
\\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
\\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
When use_global_stats = True, the :math:`\\mu_{\\beta}`
and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch.
They are global (or running) statistics. (It usually got from the
pre-trained model.)
The training and testing (or inference) have the same behavior:
.. math::
\\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
\\sigma_{\\beta}^{2} + \\epsilon}} \\\\
y_i &\\gets \\gamma \\hat{x_i} + \\beta
Args:
input(variable): The rank of input variable can be 2, 3, 4, 5.
act(string, Default None): Activation type, linear|relu|prelu|...
is_test (bool, Default False): A flag indicating whether it is in
test phrase or not.
momentum(float, Default 0.9): The value used for the moving_mean and
moving_var computation. The updated formula is:
:math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)`
:math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)`
Default is 0.9.
epsilon(float, Default 1e-05): A value added to the denominator for
numerical stability. Default is 1e-5.
param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the bias of batch_norm.
If it is set to None or one attribute of ParamAttr, batch_norm
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
data_layout(string, default NCHW): NCHW|NHWC
in_place(bool, Default False): Make the input and output of batch norm reuse memory.
name(string, Default None): A name for this layer(optional). If set None, the layer
will be named automatically.
moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance.
do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not.
fuse_with_relu (bool): if True, this OP performs relu after batch norm.
use_global_stats(bool, Default False): Whether to use global mean and
variance. In inference or test mode, set use_global_stats to true
or is_test to true, and the behavior is equivalent.
In train mode, when setting use_global_stats True, the global mean
and variance are also used during train period.
Returns:
Variable: A tensor variable which is the result after applying batch normalization on the input.
Examples:
.. code-block:: python
fc = fluid.FC('fc', size=200, param_attr='fc1.w')
hidden1 = fc(x)
batch_norm = fluid.BatchNorm("batch_norm", 10)
hidden2 = batch_norm(hidden1)
"""
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -629,7 +1071,7 @@ class BatchNorm(layers.Layer): ...@@ -629,7 +1071,7 @@ class BatchNorm(layers.Layer):
dtype=self._dtype, dtype=self._dtype,
default_initializer=Constant(1.0)) default_initializer=Constant(1.0))
if use_global_stats and self._param_attr.learning_rate == 0.: if use_global_stats and self._param_attr.learning_rate == 0.:
self._scale._stop_gradient = True self._scale.stop_gradient = True
self._bias = self.create_parameter( self._bias = self.create_parameter(
attr=self._param_attr, attr=self._param_attr,
...@@ -637,7 +1079,7 @@ class BatchNorm(layers.Layer): ...@@ -637,7 +1079,7 @@ class BatchNorm(layers.Layer):
dtype=self._dtype, dtype=self._dtype,
is_bias=True) is_bias=True)
if use_global_stats and self._param_attr.learning_rate == 0.: if use_global_stats and self._param_attr.learning_rate == 0.:
self._bias._stop_gradient = True self._bias.stop_gradient = True
self._mean = self.create_parameter( self._mean = self.create_parameter(
attr=ParamAttr( attr=ParamAttr(
...@@ -647,7 +1089,7 @@ class BatchNorm(layers.Layer): ...@@ -647,7 +1089,7 @@ class BatchNorm(layers.Layer):
do_model_average=do_model_average_for_mean_and_var), do_model_average=do_model_average_for_mean_and_var),
shape=param_shape, shape=param_shape,
dtype=self._dtype) dtype=self._dtype)
self._mean._stop_gradient = True self._mean.stop_gradient = True
self._variance = self.create_parameter( self._variance = self.create_parameter(
attr=ParamAttr( attr=ParamAttr(
...@@ -657,7 +1099,7 @@ class BatchNorm(layers.Layer): ...@@ -657,7 +1099,7 @@ class BatchNorm(layers.Layer):
do_model_average=do_model_average_for_mean_and_var), do_model_average=do_model_average_for_mean_and_var),
shape=param_shape, shape=param_shape,
dtype=self._dtype) dtype=self._dtype)
self._variance._stop_gradient = True self._variance.stop_gradient = True
self._in_place = in_place self._in_place = in_place
self._momentum = momentum self._momentum = momentum
...@@ -666,7 +1108,7 @@ class BatchNorm(layers.Layer): ...@@ -666,7 +1108,7 @@ class BatchNorm(layers.Layer):
self._fuse_with_relu = fuse_with_relu self._fuse_with_relu = fuse_with_relu
self._use_global_stats = use_global_stats self._use_global_stats = use_global_stats
def _build_once(self, input): def build_once(self, input):
pass pass
def forward(self, input): def forward(self, input):
...@@ -747,7 +1189,7 @@ class Embedding(layers.Layer): ...@@ -747,7 +1189,7 @@ class Embedding(layers.Layer):
dict_size = len(dataset.ids) dict_size = len(dataset.ids)
input = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32') input = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32')
embedding = fluid.dygraph.Embedding(size=[dict_size, 16]) embedding = fluid.Embedding(size=[dict_size, 16])
fc = embedding(input) fc = embedding(input)
""" """
...@@ -797,70 +1239,70 @@ class Embedding(layers.Layer): ...@@ -797,70 +1239,70 @@ class Embedding(layers.Layer):
class LayerNorm(layers.Layer): class LayerNorm(layers.Layer):
def __init__(self, """
name_scope, ${comment}
scale=True,
shift=True,
begin_norm_axis=1,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
act=None):
"""
${comment}
The formula is as follows: The formula is as follows:
.. math:: .. math::
\\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i
\\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}(a_i - \\mu)^2} \\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}(a_i - \\mu)^2}
h & = f(\\frac{g}{\\sigma}(a - \\mu) + b) h & = f(\\frac{g}{\\sigma}(a - \\mu) + b)
* :math:`a`: the vector representation of the summed inputs to the neurons * :math:`a`: the vector representation of the summed inputs to the neurons
in that layer. in that layer.
* :math:`H`: the number of hidden units in a layers * :math:`H`: the number of hidden units in a layers
* :math:`g`: the trainable scale parameter. * :math:`g`: the trainable scale parameter.
* :math:`b`: the trainable bias parameter. * :math:`b`: the trainable bias parameter.
Args: Args:
input(Variable): The input tensor variable. input(Variable): The input tensor variable.
scale(bool): Whether to learn the adaptive gain :math:`g` after scale(bool): Whether to learn the adaptive gain :math:`g` after
normalization. Default True. normalization. Default True.
shift(bool): Whether to learn the adaptive bias :math:`b` after shift(bool): Whether to learn the adaptive bias :math:`b` after
normalization. Default True. normalization. Default True.
begin_norm_axis(int): The normalization will be performed along begin_norm_axis(int): The normalization will be performed along
dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`. dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`.
Default 1. Default 1.
epsilon(float): The small value added to the variance to prevent epsilon(float): The small value added to the variance to prevent
division by zero. Default 1e-05. division by zero. Default 1e-05.
param_attr(ParamAttr|None): The parameter attribute for the learnable param_attr(ParamAttr|None): The parameter attribute for the learnable
gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
omitted. If :attr:`scale` is True and :attr:`param_attr` is None, omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as scale. The a default :code:`ParamAttr` would be added as scale. The
:attr:`param_attr` is initialized as 1 if it is added. Default None. :attr:`param_attr` is initialized as 1 if it is added. Default None.
bias_attr(ParamAttr|None): The parameter attribute for the learnable bias_attr(ParamAttr|None): The parameter attribute for the learnable
bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
omitted. If :attr:`shift` is True and :attr:`param_attr` is None, omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as bias. The a default :code:`ParamAttr` would be added as bias. The
:attr:`bias_attr` is initialized as 0 if it is added. Default None. :attr:`bias_attr` is initialized as 0 if it is added. Default None.
act(str): Activation to be applied to the output of layer normalizaiton. act(str): Activation to be applied to the output of layer normalizaiton.
Default None. Default None.
Returns: Returns:
${y_comment} ${y_comment}
Examples: Examples:
>>> data = fluid.layers.data(name='data', shape=[3, 32, 32], >>> data = fluid.layers.data(name='data', shape=[3, 32, 32],
>>> dtype='float32') >>> dtype='float32')
>>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
""" """
def __init__(self,
name_scope,
scale=True,
shift=True,
begin_norm_axis=1,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
act=None):
super(LayerNorm, self).__init__(name_scope) super(LayerNorm, self).__init__(name_scope)
self._scale = scale self._scale = scale
self._shift = shift self._shift = shift
...@@ -870,7 +1312,7 @@ class LayerNorm(layers.Layer): ...@@ -870,7 +1312,7 @@ class LayerNorm(layers.Layer):
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._act = act self._act = act
def _build_once(self, input): def build_once(self, input):
self._dtype = self._helper.input_dtype(input) self._dtype = self._helper.input_dtype(input)
input_shape = input.shape input_shape = input.shape
param_shape = [ param_shape = [
...@@ -1232,7 +1674,7 @@ class NCE(layers.Layer): ...@@ -1232,7 +1674,7 @@ class NCE(layers.Layer):
'remote_prefetch': remote_prefetch 'remote_prefetch': remote_prefetch
} }
def _build_once(self, input, label, sample_weight=None): def build_once(self, input, label, sample_weight=None):
assert isinstance(input, Variable) assert isinstance(input, Variable)
assert isinstance(label, Variable) assert isinstance(label, Variable)
...@@ -1318,7 +1760,7 @@ class PRelu(layers.Layer): ...@@ -1318,7 +1760,7 @@ class PRelu(layers.Layer):
raise ValueError('mode should be one of all, channel, element.') raise ValueError('mode should be one of all, channel, element.')
self._alpha_shape = [1] self._alpha_shape = [1]
def _build_once(self, input): def build_once(self, input):
if self._mode == 'channel': if self._mode == 'channel':
self._alpha_shape = [1, input.shape[1], 1, 1] self._alpha_shape = [1, input.shape[1], 1, 1]
elif self._mode == 'element': elif self._mode == 'element':
...@@ -1396,7 +1838,7 @@ class BilinearTensorProduct(layers.Layer): ...@@ -1396,7 +1838,7 @@ class BilinearTensorProduct(layers.Layer):
self._name = name self._name = name
self._inputs = dict() self._inputs = dict()
def _build_once(self, x, y): def build_once(self, x, y):
self._dtype = self._helper.input_dtype(x) self._dtype = self._helper.input_dtype(x)
param_shape = [self._size, x.shape[1], y.shape[1]] param_shape = [self._size, x.shape[1], y.shape[1]]
...@@ -1572,7 +2014,7 @@ class Conv2DTranspose(layers.Layer): ...@@ -1572,7 +2014,7 @@ class Conv2DTranspose(layers.Layer):
self._output_size = output_size self._output_size = output_size
self._op_type = 'conv2d_transpose' self._op_type = 'conv2d_transpose'
def _build_once(self, input): def build_once(self, input):
input_channel = input.shape[1] input_channel = input.shape[1]
if (input_channel == self._groups and if (input_channel == self._groups and
self._num_filters == input_channel and not self._use_cudnn): self._num_filters == input_channel and not self._use_cudnn):
...@@ -1686,7 +2128,7 @@ class SequenceConv(layers.Layer): ...@@ -1686,7 +2128,7 @@ class SequenceConv(layers.Layer):
bias_attr=None, bias_attr=None,
param_attr=None, param_attr=None,
act=None): act=None):
assert not _in_dygraph_mode( assert not in_dygraph_mode(
), "SequenceConv is not supported by dynamic graph mode yet!" ), "SequenceConv is not supported by dynamic graph mode yet!"
super(SequenceConv, self).__init__(name_scope) super(SequenceConv, self).__init__(name_scope)
self._num_filters = num_filters self._num_filters = num_filters
...@@ -1696,7 +2138,7 @@ class SequenceConv(layers.Layer): ...@@ -1696,7 +2138,7 @@ class SequenceConv(layers.Layer):
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._param_attr = param_attr self._param_attr = param_attr
def _build_once(self, input): def build_once(self, input):
self._dtype = self._helper.input_dtype(input) self._dtype = self._helper.input_dtype(input)
filter_shape = [self._filter_size * input.shape[1], self._num_filters] filter_shape = [self._filter_size * input.shape[1], self._num_filters]
self._filter_param = self.create_parameter( self._filter_param = self.create_parameter(
...@@ -1726,14 +2168,14 @@ class RowConv(layers.Layer): ...@@ -1726,14 +2168,14 @@ class RowConv(layers.Layer):
future_context_size, future_context_size,
param_attr=None, param_attr=None,
act=None): act=None):
assert not _in_dygraph_mode( assert not in_dygraph_mode(
), "RowConv is not supported by dynamic graph mode yet!" ), "RowConv is not supported by dynamic graph mode yet!"
super(RowConv, self).__init__(name_scope) super(RowConv, self).__init__(name_scope)
self._act = act self._act = act
self._param_attr = param_attr self._param_attr = param_attr
self._future_context_size = future_context_size self._future_context_size = future_context_size
def _build_once(self, input): def build_once(self, input):
self._dtype = self._helper.input_dtype(input) self._dtype = self._helper.input_dtype(input)
filter_shape = [self._future_context_size + 1, input.shape[1]] filter_shape = [self._future_context_size + 1, input.shape[1]]
self._filter_param = self.create_parameter( self._filter_param = self.create_parameter(
...@@ -1796,7 +2238,7 @@ class GroupNorm(layers.Layer): ...@@ -1796,7 +2238,7 @@ class GroupNorm(layers.Layer):
if data_layout != 'NCHW': if data_layout != 'NCHW':
raise ValueError("unsupported data layout:" + data_layout) raise ValueError("unsupported data layout:" + data_layout)
def _build_once(self, input): def build_once(self, input):
self._dtype = self._helper.input_dtype(input) self._dtype = self._helper.input_dtype(input)
param_shape = [input.shape[1]] param_shape = [input.shape[1]]
if self._bias_attr: if self._bias_attr:
...@@ -1849,7 +2291,7 @@ class SpectralNorm(layers.Layer): ...@@ -1849,7 +2291,7 @@ class SpectralNorm(layers.Layer):
self._eps = eps self._eps = eps
self._dim = dim self._dim = dim
def _build_once(self, weight): def build_once(self, weight):
self._dtype = self._helper.input_dtype(weight) self._dtype = self._helper.input_dtype(weight)
input_shape = weight.shape input_shape = weight.shape
h = input_shape[self._dim] h = input_shape[self._dim]
...@@ -1904,7 +2346,7 @@ class TreeConv(layers.Layer): ...@@ -1904,7 +2346,7 @@ class TreeConv(layers.Layer):
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._param_attr = param_attr self._param_attr = param_attr
def _build_once(self, nodes_vector, edge_set): def build_once(self, nodes_vector, edge_set):
assert isinstance(nodes_vector, Variable) assert isinstance(nodes_vector, Variable)
assert isinstance(edge_set, Variable) assert isinstance(edge_set, Variable)
self._dtype = self._helper.input_dtype(nodes_vector) self._dtype = self._helper.input_dtype(nodes_vector)
......
...@@ -712,10 +712,6 @@ class Executor(object): ...@@ -712,10 +712,6 @@ class Executor(object):
if dataset == None: if dataset == None:
raise RuntimeError("dataset is needed and should be initialized") raise RuntimeError("dataset is needed and should be initialized")
if not isinstance(self.place, core.CPUPlace):
raise RuntimeError("infer_from_dataset is verified on CPUPlace"
"We will open CUDAPlace in the future")
scope, trainer = self._prepare_trainer( scope, trainer = self._prepare_trainer(
program=program, program=program,
dataset=dataset, dataset=dataset,
...@@ -796,10 +792,6 @@ class Executor(object): ...@@ -796,10 +792,6 @@ class Executor(object):
if dataset == None: if dataset == None:
raise RuntimeError("dataset is need and should be initialized") raise RuntimeError("dataset is need and should be initialized")
if not isinstance(self.place, core.CPUPlace):
raise RuntimeError("train_from_dataset is verified on CPUPlace"
"We will open CUDAPlace in the future")
scope, trainer = self._prepare_trainer( scope, trainer = self._prepare_trainer(
program=program, program=program,
dataset=dataset, dataset=dataset,
......
...@@ -67,6 +67,7 @@ __all__ = [ ...@@ -67,6 +67,7 @@ __all__ = [
'cuda_places', 'cuda_places',
'cpu_places', 'cpu_places',
'cuda_pinned_places', 'cuda_pinned_places',
'in_dygraph_mode',
] ]
EMPTY_VAR_NAME = core.kEmptyVarName() EMPTY_VAR_NAME = core.kEmptyVarName()
...@@ -79,7 +80,10 @@ _dygraph_tracer_ = None ...@@ -79,7 +80,10 @@ _dygraph_tracer_ = None
_dygraph_current_expected_place_ = None _dygraph_current_expected_place_ = None
def _in_dygraph_mode(): def in_dygraph_mode():
'''
Returns(bool): True if the program is running in dynamic graph mode
'''
return _dygraph_tracer_ is not None return _dygraph_tracer_ is not None
...@@ -396,7 +400,7 @@ class Variable(object): ...@@ -396,7 +400,7 @@ class Variable(object):
if not isinstance(dtype, core.VarDesc.VarType): if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype) dtype = convert_np_dtype_to_dtype_(dtype)
if _in_dygraph_mode(): if in_dygraph_mode():
# record vars in tracer rather than blocks # record vars in tracer rather than blocks
self._ivar = kwargs.get("ivar", None) self._ivar = kwargs.get("ivar", None)
if not self._ivar: if not self._ivar:
...@@ -482,21 +486,21 @@ class Variable(object): ...@@ -482,21 +486,21 @@ class Variable(object):
self.block.vars[name] = self self.block.vars[name] = self
self.op = None self.op = None
self.stop_gradient = stop_gradient self._stop_gradient = stop_gradient
self.is_data = is_data self.is_data = is_data
def _numpy(self): def numpy(self):
new_ivar = self._ivar._copy_to(core.CPUPlace(), True) new_ivar = self._ivar._copy_to(core.CPUPlace(), True)
return np.array(new_ivar.value().get_tensor()) return np.array(new_ivar.value().get_tensor())
def _backward(self): def backward(self):
self._ivar._run_backward() self._ivar._run_backward()
def _gradient(self): def gradient(self):
new_ivar = self._ivar._grad_ivar()._copy_to(core.CPUPlace(), True) new_ivar = self._ivar._grad_ivar()._copy_to(core.CPUPlace(), True)
return np.array(new_ivar.value().get_tensor()) return np.array(new_ivar.value().get_tensor())
def _clear_gradient(self): def clear_gradient(self):
self._ivar._clear_gradient() self._ivar._clear_gradient()
def __str__(self): def __str__(self):
...@@ -516,7 +520,7 @@ class Variable(object): ...@@ -516,7 +520,7 @@ class Variable(object):
Returns: Returns:
str: The debug string. str: The debug string.
""" """
if _in_dygraph_mode(): if in_dygraph_mode():
# TODO(panyx0718): add more dygraph debug info. # TODO(panyx0718): add more dygraph debug info.
return 'name %s, dtype: %s shape: %s' % (self.name, self.dtype, return 'name %s, dtype: %s shape: %s' % (self.name, self.dtype,
self.shape) self.shape)
...@@ -535,7 +539,7 @@ class Variable(object): ...@@ -535,7 +539,7 @@ class Variable(object):
__repr__ = __str__ __repr__ = __str__
def _set_desc(self, input): def set_desc(self, input):
""" """
Set the variable description. Set the variable description.
...@@ -548,43 +552,43 @@ class Variable(object): ...@@ -548,43 +552,43 @@ class Variable(object):
self.desc = input self.desc = input
@property @property
def _stop_gradient(self): def stop_gradient(self):
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.stop_gradient return self._ivar.stop_gradient
else: else:
return self.stop_gradient return self._stop_gradient
@_stop_gradient.setter @stop_gradient.setter
def _stop_gradient(self, s): def stop_gradient(self, s):
if _in_dygraph_mode(): if in_dygraph_mode():
self._ivar.stop_gradient = s self._ivar.stop_gradient = s
else: else:
self.stop_gradient = s self._stop_gradient = s
@property @property
def persistable(self): def persistable(self):
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.persistable return self._ivar.persistable
else: else:
return self.desc.persistable() return self.desc.persistable()
@persistable.setter @persistable.setter
def persistable(self, p): def persistable(self, p):
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.persistable return self._ivar.persistable
else: else:
self.desc.set_persistable(p) self.desc.set_persistable(p)
@property @property
def name(self): def name(self):
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.name return self._ivar.name
else: else:
return cpt.to_text(self.desc.name()) return cpt.to_text(self.desc.name())
@name.setter @name.setter
def name(self, new_name): def name(self, new_name):
if _in_dygraph_mode(): if in_dygraph_mode():
self._ivar.name = new_name self._ivar.name = new_name
else: else:
self.desc.set_name(new_name) self.desc.set_name(new_name)
...@@ -592,14 +596,14 @@ class Variable(object): ...@@ -592,14 +596,14 @@ class Variable(object):
@property @property
def shape(self): def shape(self):
# convert to tuple, make it as same as numpy API. # convert to tuple, make it as same as numpy API.
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.shape return self._ivar.shape
else: else:
return tuple(self.desc.shape()) return tuple(self.desc.shape())
@property @property
def dtype(self): def dtype(self):
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.dtype return self._ivar.dtype
else: else:
return self.desc.dtype() return self.desc.dtype()
...@@ -611,7 +615,7 @@ class Variable(object): ...@@ -611,7 +615,7 @@ class Variable(object):
@property @property
def type(self): def type(self):
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.dtype return self._ivar.dtype
else: else:
return self.desc.type() return self.desc.type()
...@@ -721,7 +725,7 @@ class Variable(object): ...@@ -721,7 +725,7 @@ class Variable(object):
name=unique_name.generate(".".join(self.name)), name=unique_name.generate(".".join(self.name)),
dtype=self.dtype, dtype=self.dtype,
persistable=self.persistable, persistable=self.persistable,
stop_gradient=self._stop_gradient, ) stop_gradient=self.stop_gradient, )
else: else:
return self return self
...@@ -930,7 +934,7 @@ class Operator(object): ...@@ -930,7 +934,7 @@ class Operator(object):
inputs=None, inputs=None,
outputs=None, outputs=None,
attrs=None): attrs=None):
if _in_dygraph_mode(): if in_dygraph_mode():
if type is None: if type is None:
raise ValueError( raise ValueError(
"`type` to initialized an Operator can not be None.") "`type` to initialized an Operator can not be None.")
...@@ -1049,7 +1053,7 @@ class Operator(object): ...@@ -1049,7 +1053,7 @@ class Operator(object):
for arg in out_args: for arg in out_args:
out_arg_names.append(cpt.to_text(arg.name)) out_arg_names.append(cpt.to_text(arg.name))
# TODO(minqiyang): could we remove variable's op in static mode? # TODO(minqiyang): could we remove variable's op in static mode?
if not _in_dygraph_mode(): if not in_dygraph_mode():
arg.op = self arg.op = self
self.desc.set_output(out_proto.name, out_arg_names) self.desc.set_output(out_proto.name, out_arg_names)
...@@ -1095,7 +1099,7 @@ class Operator(object): ...@@ -1095,7 +1099,7 @@ class Operator(object):
@property @property
def type(self): def type(self):
if _in_dygraph_mode(): if in_dygraph_mode():
return self.iop.type return self.iop.type
else: else:
return self.desc.type() return self.desc.type()
...@@ -1638,7 +1642,7 @@ class Block(object): ...@@ -1638,7 +1642,7 @@ class Block(object):
Returns: Returns:
Operator: the append Operator. Operator: the append Operator.
""" """
if _in_dygraph_mode(): if in_dygraph_mode():
op = Operator( op = Operator(
block=self, block=self,
desc=None, desc=None,
...@@ -1710,7 +1714,7 @@ class Block(object): ...@@ -1710,7 +1714,7 @@ class Block(object):
return self.ops[start:end] return self.ops[start:end]
def _prepend_op(self, *args, **kwargs): def _prepend_op(self, *args, **kwargs):
if _in_dygraph_mode(): if in_dygraph_mode():
op = Operator( op = Operator(
self, self,
None, None,
......
...@@ -165,7 +165,7 @@ class ConstantInitializer(Initializer): ...@@ -165,7 +165,7 @@ class ConstantInitializer(Initializer):
'force_cpu': self._force_cpu or force_init_on_cpu() 'force_cpu': self._force_cpu or force_init_on_cpu()
}, },
stop_gradient=True) stop_gradient=True)
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -245,7 +245,7 @@ class UniformInitializer(Initializer): ...@@ -245,7 +245,7 @@ class UniformInitializer(Initializer):
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -324,7 +324,7 @@ class NormalInitializer(Initializer): ...@@ -324,7 +324,7 @@ class NormalInitializer(Initializer):
outputs={"Out": var}, outputs={"Out": var},
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -403,7 +403,7 @@ class TruncatedNormalInitializer(Initializer): ...@@ -403,7 +403,7 @@ class TruncatedNormalInitializer(Initializer):
outputs={"Out": var}, outputs={"Out": var},
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -509,7 +509,7 @@ class XavierInitializer(Initializer): ...@@ -509,7 +509,7 @@ class XavierInitializer(Initializer):
"seed": self._seed "seed": self._seed
}, },
stop_gradient=True) stop_gradient=True)
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -610,7 +610,7 @@ class MSRAInitializer(Initializer): ...@@ -610,7 +610,7 @@ class MSRAInitializer(Initializer):
"seed": self._seed "seed": self._seed
}, },
stop_gradient=True) stop_gradient=True)
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -709,7 +709,7 @@ class BilinearInitializer(Initializer): ...@@ -709,7 +709,7 @@ class BilinearInitializer(Initializer):
'shape': list(shape), 'shape': list(shape),
value_name: values value_name: values
}) })
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -768,7 +768,7 @@ class NumpyArrayInitializer(Initializer): ...@@ -768,7 +768,7 @@ class NumpyArrayInitializer(Initializer):
value_name: values value_name: values
}, },
stop_gradient=True) stop_gradient=True)
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
......
...@@ -17,7 +17,7 @@ from __future__ import print_function ...@@ -17,7 +17,7 @@ from __future__ import print_function
import copy import copy
import six import six
from .framework import Parameter, dtype_is_floating, _in_dygraph_mode from .framework import Parameter, dtype_is_floating, in_dygraph_mode
from . import unique_name from . import unique_name
from paddle.fluid.initializer import Constant, Xavier from paddle.fluid.initializer import Constant, Xavier
from .param_attr import ParamAttr from .param_attr import ParamAttr
......
...@@ -17,7 +17,7 @@ from __future__ import print_function ...@@ -17,7 +17,7 @@ from __future__ import print_function
import copy import copy
import numpy as np import numpy as np
from .framework import Variable, default_main_program, default_startup_program, _in_dygraph_mode, _current_expected_place from .framework import Variable, default_main_program, default_startup_program, in_dygraph_mode, _current_expected_place
from . import unique_name from . import unique_name
from .param_attr import ParamAttr, WeightNormParamAttr from .param_attr import ParamAttr, WeightNormParamAttr
from . import core from . import core
...@@ -54,7 +54,7 @@ class LayerHelperBase(object): ...@@ -54,7 +54,7 @@ class LayerHelperBase(object):
Return Variable construct from value Return Variable construct from value
""" """
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
assert _in_dygraph_mode( assert in_dygraph_mode(
), "to_variable could only be called in dygraph mode" ), "to_variable could only be called in dygraph mode"
if not block: if not block:
...@@ -302,7 +302,7 @@ class LayerHelperBase(object): ...@@ -302,7 +302,7 @@ class LayerHelperBase(object):
param = self._create_weight_normalize(attr, shape, dtype) param = self._create_weight_normalize(attr, shape, dtype)
WeightNormParamAttr.params_with_weight_norm.append(param) WeightNormParamAttr.params_with_weight_norm.append(param)
return param return param
if _in_dygraph_mode(): if in_dygraph_mode():
# In dygraph mode, we want the returned parameter to be # In dygraph mode, we want the returned parameter to be
# initialized so that it can be used imperatively. # initialized so that it can be used imperatively.
return self.main_program.global_block().create_parameter( return self.main_program.global_block().create_parameter(
...@@ -370,7 +370,7 @@ class LayerHelperBase(object): ...@@ -370,7 +370,7 @@ class LayerHelperBase(object):
initializer: initializer to use initializer: initializer to use
""" """
assert isinstance(var, Variable) assert isinstance(var, Variable)
if _in_dygraph_mode(): if in_dygraph_mode():
initializer(var, var.block) initializer(var, var.block)
else: else:
self.startup_program.global_block().create_var( self.startup_program.global_block().create_var(
......
...@@ -35,8 +35,8 @@ from ..dygraph import learning_rate_scheduler as imperate_lr ...@@ -35,8 +35,8 @@ from ..dygraph import learning_rate_scheduler as imperate_lr
__all__ = [ __all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS', 'polynomial_decay', 'piecewise_decay', 'noam_decay', 'cosine_decay',
'cosine_decay', 'linear_lr_warmup' 'linear_lr_warmup'
] ]
...@@ -349,24 +349,26 @@ def cosine_decay(learning_rate, step_each_epoch, epochs): ...@@ -349,24 +349,26 @@ def cosine_decay(learning_rate, step_each_epoch, epochs):
training progresses. By using this function, the learning rate will be decayed by training progresses. By using this function, the learning rate will be decayed by
following cosine decay strategy. following cosine decay strategy.
decayed_lr = learning_rate * 0.5 * (math.cos(epoch * math.pi / epochs) + 1) .. math::
decayed\_lr = learning\_rate * 0.5 * (math.cos * (epoch * \\frac{math.pi}{epochs} ) + 1)
Args: Args:
learning_rate(Variable|float): The initial learning rate. learning_rate(Variable|float): The initial learning rate.
step_each_epoch(int): the number of steps in an epoch. step_each_epoch(int): the number of steps in an epoch.
epochs(int): the number of epochs. epochs(int): the number of epochs.
Returns: Returns:
Variable: The decayed learning rate. Variable: The decayed learning rate.
Examples:
..code-block:: python Examples:
.. code-block:: python
base_lr = 0.1 base_lr = 0.1
lr = fluid.layers.cosine_decay( lr = fluid.layers.cosine_decay(
learning_rate = base_lr, step_each_epoch=10000, epochs=120) learning_rate = base_lr, step_each_epoch=10000, epochs=120)
""" """
with default_main_program()._lr_schedule_guard(): with default_main_program()._lr_schedule_guard():
if imperative_base.enabled(): if imperative_base.enabled():
decay = imperate_lr.CosineDecay(learning_rate, step_each_epoch, decay = imperate_lr.CosineDecay(learning_rate, step_each_epoch,
...@@ -381,50 +383,6 @@ def cosine_decay(learning_rate, step_each_epoch, epochs): ...@@ -381,50 +383,6 @@ def cosine_decay(learning_rate, step_each_epoch, epochs):
return decayed_lr return decayed_lr
def append_LARS(params_grads, learning_rate, weight_decay):
"""
Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
each layer.
Args:
learning_rate: A learning rate Variable. This
is the global learning rate for LARS.
weight_decay: A Python `float` number.
Returns:
The decayed learning rate
Examples:
.. code-block:: python
learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
"""
assert not imperative_base.enabled(
), "append_LARS is NOT supported in dygraph mode now"
def _balanced_weight(param_norm, grad_norm):
if weight_decay == 1.0:
return grad_norm + param_norm
else:
return grad_norm + weight_decay * param_norm
for param, grad in params_grads:
with param.block.program.optimized_guard(
[param, grad]), name_scope("optimizer"):
param_lr = param.optimize_attr['learning_rate']
param_norm = ops.sqrt(nn.reduce_sum(input=ops.square(param)))
grad_norm = ops.sqrt(nn.reduce_sum(input=ops.square(grad)))
if type(param_lr) == float and param_lr == 1.0:
decayed_lr = learning_rate * param_norm \
/ _balanced_weight(param_norm, grad_norm)
else:
decayed_lr = learning_rate * param_lr * param_norm \
/ _balanced_weight(param_norm, grad_norm)
# set back param local learning rate
param.optimize_attr['learning_rate'] = decayed_lr
def linear_lr_warmup(learning_rate, warmup_steps, start_lr, end_lr): def linear_lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
""" """
Applies linear learning rate warmup before the normal learning rate Applies linear learning rate warmup before the normal learning rate
......
...@@ -23,7 +23,7 @@ import os ...@@ -23,7 +23,7 @@ import os
import inspect import inspect
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant, NumpyArrayInitializer from ..initializer import Normal, Constant, NumpyArrayInitializer
from ..framework import Variable, OpProtoHolder, _in_dygraph_mode from ..framework import Variable, OpProtoHolder, in_dygraph_mode
from ..dygraph import base from ..dygraph import base
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_ from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_
...@@ -73,6 +73,8 @@ __all__ = [ ...@@ -73,6 +73,8 @@ __all__ = [
'reduce_max', 'reduce_max',
'reduce_min', 'reduce_min',
'reduce_prod', 'reduce_prod',
'reduce_all',
'reduce_any',
'sequence_first_step', 'sequence_first_step',
'sequence_last_step', 'sequence_last_step',
'sequence_slice', 'sequence_slice',
...@@ -159,6 +161,7 @@ __all__ = [ ...@@ -159,6 +161,7 @@ __all__ = [
'sum', 'sum',
'slice', 'slice',
'shape', 'shape',
'rank',
'logical_and', 'logical_and',
'logical_or', 'logical_or',
'logical_xor', 'logical_xor',
...@@ -482,7 +485,7 @@ def dynamic_lstm(input, ...@@ -482,7 +485,7 @@ def dynamic_lstm(input,
forward, _ = fluid.layers.dynamic_lstm( forward, _ = fluid.layers.dynamic_lstm(
input=forward_proj, size=hidden_dim * 4, use_peepholes=False) input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
""" """
assert _in_dygraph_mode( assert in_dygraph_mode(
) is not True, "please use lstm instead of dynamic_lstm in dygraph mode!" ) is not True, "please use lstm instead of dynamic_lstm in dygraph mode!"
assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp." assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
helper = LayerHelper('lstm', **locals()) helper = LayerHelper('lstm', **locals())
...@@ -868,7 +871,7 @@ def dynamic_lstmp(input, ...@@ -868,7 +871,7 @@ def dynamic_lstmp(input,
proj_activation="tanh") proj_activation="tanh")
""" """
assert _in_dygraph_mode( assert in_dygraph_mode(
) is not True, "please use lstm instead of dynamic_lstmp in dygraph mode!" ) is not True, "please use lstm instead of dynamic_lstmp in dygraph mode!"
assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp." assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
...@@ -1042,7 +1045,7 @@ def dynamic_gru(input, ...@@ -1042,7 +1045,7 @@ def dynamic_gru(input,
hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim) hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim)
""" """
assert _in_dygraph_mode( assert in_dygraph_mode(
) is not True, "please use gru instead of dynamic_gru in dygraph mode!" ) is not True, "please use gru instead of dynamic_gru in dygraph mode!"
helper = LayerHelper('gru', **locals()) helper = LayerHelper('gru', **locals())
...@@ -1761,7 +1764,7 @@ def sequence_conv(input, ...@@ -1761,7 +1764,7 @@ def sequence_conv(input,
Variable: output of sequence_conv Variable: output of sequence_conv
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_conv', **locals()) helper = LayerHelper('sequence_conv', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -1822,7 +1825,7 @@ def sequence_softmax(input, use_cudnn=False, name=None): ...@@ -1822,7 +1825,7 @@ def sequence_softmax(input, use_cudnn=False, name=None):
dtype='float32', lod_level=1) dtype='float32', lod_level=1)
x_sequence_softmax = fluid.layers.sequence_softmax(input=x) x_sequence_softmax = fluid.layers.sequence_softmax(input=x)
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_softmax', **locals()) helper = LayerHelper('sequence_softmax', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -2316,7 +2319,7 @@ def sequence_pool(input, pool_type, is_test=False): ...@@ -2316,7 +2319,7 @@ def sequence_pool(input, pool_type, is_test=False):
last_x = fluid.layers.sequence_pool(input=x, pool_type='last') last_x = fluid.layers.sequence_pool(input=x, pool_type='last')
first_x = fluid.layers.sequence_pool(input=x, pool_type='first') first_x = fluid.layers.sequence_pool(input=x, pool_type='first')
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_pool', **locals()) helper = LayerHelper('sequence_pool', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -2357,7 +2360,7 @@ def sequence_concat(input, name=None): ...@@ -2357,7 +2360,7 @@ def sequence_concat(input, name=None):
out = fluid.layers.sequence_concat(input=[seq1, seq2, seq3]) out = fluid.layers.sequence_concat(input=[seq1, seq2, seq3])
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_concat', **locals()) helper = LayerHelper('sequence_concat', **locals())
out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
...@@ -2486,7 +2489,7 @@ def sequence_slice(input, offset, length, name=None): ...@@ -2486,7 +2489,7 @@ def sequence_slice(input, offset, length, name=None):
subseqs = fluid.layers.sequence_slice(input=seqs, offset=offset, subseqs = fluid.layers.sequence_slice(input=seqs, offset=offset,
length=length) length=length)
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper("sequence_slice", **locals()) helper = LayerHelper("sequence_slice", **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -3308,7 +3311,7 @@ def layer_norm(input, ...@@ -3308,7 +3311,7 @@ def layer_norm(input,
>>> dtype='float32') >>> dtype='float32')
>>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
""" """
assert _in_dygraph_mode( assert in_dygraph_mode(
) is not True, "please use FC instead of fc in dygraph mode!" ) is not True, "please use FC instead of fc in dygraph mode!"
helper = LayerHelper('layer_norm', **locals()) helper = LayerHelper('layer_norm', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -3947,7 +3950,7 @@ def sequence_expand(x, y, ref_level=-1, name=None): ...@@ -3947,7 +3950,7 @@ def sequence_expand(x, y, ref_level=-1, name=None):
dtype='float32', lod_level=1) dtype='float32', lod_level=1)
out = layers.sequence_expand(x=x, y=y, ref_level=0) out = layers.sequence_expand(x=x, y=y, ref_level=0)
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_expand', input=x, **locals()) helper = LayerHelper('sequence_expand', input=x, **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -4015,7 +4018,7 @@ def sequence_expand_as(x, y, name=None): ...@@ -4015,7 +4018,7 @@ def sequence_expand_as(x, y, name=None):
dtype='float32', lod_level=1) dtype='float32', lod_level=1)
out = layers.sequence_expand_as(x=x, y=y) out = layers.sequence_expand_as(x=x, y=y)
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_expand_as', input=x, **locals()) helper = LayerHelper('sequence_expand_as', input=x, **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -4063,7 +4066,7 @@ def sequence_pad(x, pad_value, maxlen=None, name=None): ...@@ -4063,7 +4066,7 @@ def sequence_pad(x, pad_value, maxlen=None, name=None):
out = fluid.layers.sequence_pad(x=x, pad_value=pad_value) out = fluid.layers.sequence_pad(x=x, pad_value=pad_value)
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_pad', input=x, **locals()) helper = LayerHelper('sequence_pad', input=x, **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -4131,7 +4134,7 @@ def sequence_unpad(x, length, name=None): ...@@ -4131,7 +4134,7 @@ def sequence_unpad(x, length, name=None):
out = fluid.layers.sequence_unpad(x=x, length=len) out = fluid.layers.sequence_unpad(x=x, length=len)
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_unpad', input=x, **locals()) helper = LayerHelper('sequence_unpad', input=x, **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -4739,6 +4742,106 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None): ...@@ -4739,6 +4742,106 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None):
return out return out
def reduce_all(input, dim=None, keep_dim=False, name=None):
"""
Computes the ``logical and`` of tensor elements over the given dimension.
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (list|int|None): The dimension along which the logical and is computed.
If :attr:`None`, compute the logical and over all elements of
:attr:`input` and return a Tensor variable with a single element,
otherwise must be in the range :math:`[-rank(input), rank(input))`.
If :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`.
keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The reduced Tensor variable.
Examples:
.. code-block:: python
# x is a bool Tensor variable with following elements:
# [[True, False]
# [True, True]]
# Each example is followed by the correspending output tensor.
fluid.layers.reduce_all(x) # False
fluid.layers.reduce_all(x, dim=0) # [True, False]
fluid.layers.reduce_all(x, dim=-1) # [False, True]
fluid.layers.reduce_all(x, dim=1,
keep_dim=True) # [[False], [True]]
"""
helper = LayerHelper('reduce_all', **locals())
out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
if dim is not None and not isinstance(dim, list):
dim = [dim]
helper.append_op(
type='reduce_all',
inputs={'X': input},
outputs={'Out': out},
attrs={
'dim': dim if dim != None else [0],
'keep_dim': keep_dim,
'reduce_all': True if dim == None else False
})
return out
def reduce_any(input, dim=None, keep_dim=False, name=None):
"""
Computes the ``logical or`` of tensor elements over the given dimension.
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (list|int|None): The dimension along which the logical or is computed.
If :attr:`None`, compute the logical or over all elements of
:attr:`input` and return a Tensor variable with a single element,
otherwise must be in the range :math:`[-rank(input), rank(input))`.
If :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`.
keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The reduced Tensor variable.
Examples:
.. code-block:: python
# x is a bool Tensor variable with following elements:
# [[True, False]
# [False, False]]
# Each example is followed by the correspending output tensor.
fluid.layers.reduce_any(x) # True
fluid.layers.reduce_any(x, dim=0) # [True, False]
fluid.layers.reduce_any(x, dim=-1) # [True, False]
fluid.layers.reduce_any(x, dim=1,
keep_dim=True) # [[True], [False]]
"""
helper = LayerHelper('reduce_any', **locals())
out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
if dim is not None and not isinstance(dim, list):
dim = [dim]
helper.append_op(
type='reduce_any',
inputs={'X': input},
outputs={'Out': out},
attrs={
'dim': dim if dim != None else [0],
'keep_dim': keep_dim,
'reduce_all': True if dim == None else False
})
return out
def split(input, num_or_sections, dim=-1, name=None): def split(input, num_or_sections, dim=-1, name=None):
""" """
Split the input tensor into multiple sub-tensors. Split the input tensor into multiple sub-tensors.
...@@ -4820,7 +4923,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): ...@@ -4820,7 +4923,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
the dimension to normalization is rank(X) + axis. -1 is the the dimension to normalization is rank(X) + axis. -1 is the
last dimension. last dimension.
epsilon(float): The epsilon value is used to avoid division by zero, \ epsilon(float): The epsilon value is used to avoid division by zero, \
the defalut value is 1e-10. the defalut value is 1e-12.
name(str|None): A name for this layer(optional). If set None, the layer \ name(str|None): A name for this layer(optional). If set None, the layer \
will be named automatically. will be named automatically.
...@@ -5306,7 +5409,7 @@ def sequence_reshape(input, new_dim): ...@@ -5306,7 +5409,7 @@ def sequence_reshape(input, new_dim):
x = fluid.layers.data(shape=[5, 20], dtype='float32', lod_level=1) x = fluid.layers.data(shape=[5, 20], dtype='float32', lod_level=1)
x_reshaped = fluid.layers.sequence_reshape(input=x, new_dim=10) x_reshaped = fluid.layers.sequence_reshape(input=x, new_dim=10)
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_reshape', **locals()) helper = LayerHelper('sequence_reshape', **locals())
out = helper.create_variable_for_type_inference(helper.input_dtype()) out = helper.create_variable_for_type_inference(helper.input_dtype())
...@@ -5842,7 +5945,7 @@ def im2sequence(input, ...@@ -5842,7 +5945,7 @@ def im2sequence(input,
input=layer, stride=[1, 1], filter_size=[2, 2]) input=layer, stride=[1, 1], filter_size=[2, 2])
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
if isinstance(filter_size, int): if isinstance(filter_size, int):
...@@ -6486,7 +6589,7 @@ def squeeze(input, axes, name=None): ...@@ -6486,7 +6589,7 @@ def squeeze(input, axes, name=None):
x = layers.data(name='x', shape=[5, 1, 10]) x = layers.data(name='x', shape=[5, 1, 10])
y = layers.sequeeze(input=x, axes=[1]) y = layers.sequeeze(input=x, axes=[1])
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"squeeze layer is not supported in dygraph mode yet.") "squeeze layer is not supported in dygraph mode yet.")
helper = LayerHelper("squeeze", **locals()) helper = LayerHelper("squeeze", **locals())
out = helper.create_variable_for_type_inference(dtype=input.dtype) out = helper.create_variable_for_type_inference(dtype=input.dtype)
...@@ -7139,10 +7242,10 @@ def image_resize(input, ...@@ -7139,10 +7242,10 @@ def image_resize(input,
out_shape(list|tuple|Variable|None): Output shape of image resize out_shape(list|tuple|Variable|None): Output shape of image resize
layer, the shape is (out_h, out_w). layer, the shape is (out_h, out_w).
Default: None Default: None
scale(float|None): The multiplier for the input height or width. scale(float|None): The multiplier for the input height or width. At
At least one of out_shape or scale must be set. least one of :attr:`out_shape` or :attr:`scale` must be set.
And out_shape has a higher priority than scale. And :attr:`out_shape` has a higher priority than :attr:`scale`.
Default: None Default: None.
name(str|None): A name for this layer(optional). If set None, the layer name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically. will be named automatically.
resample(str): The resample method. It supports 'BILINEAR' and 'NEAREST' resample(str): The resample method. It supports 'BILINEAR' and 'NEAREST'
...@@ -7180,6 +7283,7 @@ def image_resize(input, ...@@ -7180,6 +7283,7 @@ def image_resize(input,
or 'NEAREST' currently. or 'NEAREST' currently.
ValueError: One of out_shape and scale must not be None. ValueError: One of out_shape and scale must not be None.
ValueError: out_shape length should be 2. ValueError: out_shape length should be 2.
ValueError: scale should be greater than zero.
TypeError: align_corners shoule be a bool value TypeError: align_corners shoule be a bool value
ValueError: align_mode can only be '0' or '1' ValueError: align_mode can only be '0' or '1'
...@@ -7211,26 +7315,36 @@ def image_resize(input, ...@@ -7211,26 +7315,36 @@ def image_resize(input,
def _is_list_or_turple_(data): def _is_list_or_turple_(data):
return (isinstance(data, list) or isinstance(data, tuple)) return (isinstance(data, list) or isinstance(data, tuple))
out_h = 0
out_w = 0
inputs = {"X": input} inputs = {"X": input}
attrs = {
"out_h": 0,
"out_w": 0,
"interp_method": resample_type,
"align_corners": align_corners,
"align_mode": align_mode
}
if out_shape is not None: if out_shape is not None:
if isinstance(out_shape, Variable): if isinstance(out_shape, Variable):
warnings.warn("out_shape as Variable type is deprecated, \ warnings.warn("out_shape as Variable type is deprecated, \
it is recommended to use actual_shape instead of \ it is recommended to use actual_shape instead of \
out_shape to specify output shape dynamically.") out_shape to specify output shape dynamically.")
inputs['OutSize'] = out_shape inputs['OutSize'] = out_shape
elif not (_is_list_or_turple_(out_shape)): else:
raise TypeError("out_shape should be a list or tuple or Variable.") if not (_is_list_or_turple_(out_shape)):
elif len(out_shape) != 2: raise TypeError(
raise ValueError("out_shape length should be 2.") "out_shape should be a list or tuple or Variable.")
if len(out_shape) != 2:
out_shape = list(map(int, out_shape)) raise ValueError("out_shape length should be 2.")
out_h = out_shape[0]
out_w = out_shape[1] out_shape = list(map(int, out_shape))
attrs['out_h'] = out_shape[0]
attrs['out_w'] = out_shape[1]
else: else:
out_h = int(input.shape[2] * scale) if scale <= 0:
out_w = int(input.shape[3] * scale) raise ValueError("scale should be greater than zero.")
attrs['scale'] = float(scale)
if isinstance(actual_shape, Variable): if isinstance(actual_shape, Variable):
inputs["OutSize"] = actual_shape inputs["OutSize"] = actual_shape
...@@ -7242,13 +7356,7 @@ def image_resize(input, ...@@ -7242,13 +7356,7 @@ def image_resize(input,
type='{}_interp'.format(resample_type), type='{}_interp'.format(resample_type),
inputs=inputs, inputs=inputs,
outputs={"Out": out}, outputs={"Out": out},
attrs={ attrs=attrs)
"out_h": out_h,
"out_w": out_w,
"interp_method": resample_type,
"align_corners": align_corners,
"align_mode": align_mode
})
return out return out
...@@ -7316,11 +7424,14 @@ def resize_bilinear(input, ...@@ -7316,11 +7424,14 @@ def resize_bilinear(input,
Args: Args:
input(${x_type}): ${x_comment}. input(${x_type}): ${x_comment}.
out_shape(${out_size_type}): ${out_size_comment}. out_shape(list|tuple|Variable|None): Output shape of resize bilinear
layer, the shape is (out_h, out_w).
Default: None
scale(float|None): The multiplier for the input height or width. At scale(float|None): The multiplier for the input height or width. At
least one of out_shape or scale must be set. And out_shape has least one of :attr:`out_shape` or :attr:`scale` must be set.
a higher priority than scale. Default: None. And :attr:`out_shape` has a higher priority than :attr:`scale`.
Default: None.
name(str|None): The output variable name. name(str|None): The output variable name.
actual_shape(Variable): An optional input to specify output shape actual_shape(Variable): An optional input to specify output shape
...@@ -7407,11 +7518,14 @@ def resize_nearest(input, ...@@ -7407,11 +7518,14 @@ def resize_nearest(input,
Args: Args:
input(${x_type}): ${x_comment}. input(${x_type}): ${x_comment}.
out_shape(${out_size_type}): ${out_size_comment}. out_shape(list|tuple|Variable|None): Output shape of resize nearest
layer, the shape is (out_h, out_w).
Default: None
scale(float|None): The multiplier for the input height or width. At scale(float|None): The multiplier for the input height or width. At
least one of out_shape or scale must be set. And out_shape has least one of :attr:`out_shape` or :attr:`scale` must be set.
a higher priority than scale. Default: None. And :attr:`out_shape` has a higher priority than :attr:`scale`.
Default: None.
name(str|None): The output variable name. name(str|None): The output variable name.
actual_shape(Variable): An optional input to specify output shape actual_shape(Variable): An optional input to specify output shape
...@@ -7621,7 +7735,7 @@ def sequence_scatter(input, index, updates, name=None): ...@@ -7621,7 +7735,7 @@ def sequence_scatter(input, index, updates, name=None):
output = fluid.layers.sequence_scatter(input, index, updates) output = fluid.layers.sequence_scatter(input, index, updates)
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_scatter', **locals()) helper = LayerHelper('sequence_scatter', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -8711,7 +8825,7 @@ def sequence_enumerate(input, win_size, pad_value=0, name=None): ...@@ -8711,7 +8825,7 @@ def sequence_enumerate(input, win_size, pad_value=0, name=None):
x = fluid.layers.data(shape[30, 1], dtype='int32', lod_level=1) x = fluid.layers.data(shape[30, 1], dtype='int32', lod_level=1)
out = fluid.layers.sequence_enumerate(input=x, win_size=3, pad_value=0) out = fluid.layers.sequence_enumerate(input=x, win_size=3, pad_value=0)
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_enumerate', **locals()) helper = LayerHelper('sequence_enumerate', **locals())
out = helper.create_variable_for_type_inference( out = helper.create_variable_for_type_inference(
...@@ -8752,7 +8866,7 @@ def sequence_mask(x, maxlen=None, dtype='int64', name=None): ...@@ -8752,7 +8866,7 @@ def sequence_mask(x, maxlen=None, dtype='int64', name=None):
Variable: The output sequence mask. Variable: The output sequence mask.
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_mask', **locals()) helper = LayerHelper('sequence_mask', **locals())
...@@ -9227,11 +9341,37 @@ def shape(input): ...@@ -9227,11 +9341,37 @@ def shape(input):
return out return out
def rank(input):
"""
**Rank Layer**
Returns the number of dimensions for a tensor, which is a 0-D int32 Tensor.
Args:
input (Variable): The input variable.
Returns:
Variable: The rank of the input variable.
Examples:
.. code-block:: python
input = layers.data(
name="input", shape=[3, 100, 100], dtype="float32")
rank = layers.rank(input) # 4
"""
ndims = len(input.shape)
out = assign(np.array(ndims, 'int32'))
return out
def _elementwise_op(helper): def _elementwise_op(helper):
op_type = helper.layer_type op_type = helper.layer_type
x = helper.kwargs.get('x', None) x = helper.kwargs.get('x', None)
y = helper.kwargs.get('y', None) y = helper.kwargs.get('y', None)
if _in_dygraph_mode(): if in_dygraph_mode():
x = base.to_variable(x) x = base.to_variable(x)
y = base.to_variable(y) y = base.to_variable(y)
...@@ -9804,7 +9944,7 @@ def sequence_reverse(x, name=None): ...@@ -9804,7 +9944,7 @@ def sequence_reverse(x, name=None):
Returns: Returns:
out(${y_type}): ${y_comment} out(${y_type}): ${y_comment}
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.") "sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper("sequence_reverse", **locals()) helper = LayerHelper("sequence_reverse", **locals())
if name is None: if name is None:
...@@ -10992,7 +11132,7 @@ def pixel_shuffle(x, upscale_factor): ...@@ -10992,7 +11132,7 @@ def pixel_shuffle(x, upscale_factor):
Returns: Returns:
Out(Variable): the pixel shuffle result is a tensor variable with the same shape and the same type as the input. Out(Variable): Reshaped tensor according to the new dimension.
Raises: Raises:
......
...@@ -24,26 +24,11 @@ from .layer_function_generator import templatedoc ...@@ -24,26 +24,11 @@ from .layer_function_generator import templatedoc
import numpy import numpy
__all__ = [ __all__ = [
'create_tensor', 'create_tensor', 'create_parameter', 'create_global_var', 'cast',
'create_parameter', 'tensor_array_to_tensor', 'concat', 'sums', 'assign',
'create_global_var', 'fill_constant_batch_size_like', 'fill_constant', 'argmin', 'argmax',
'cast', 'argsort', 'ones', 'zeros', 'reverse', 'has_inf', 'has_nan', 'isfinite',
'tensor_array_to_tensor', 'range', 'linspace'
'concat',
'sums',
'assign',
'fill_constant_batch_size_like',
'fill_constant',
'argmin',
'argmax',
'argsort',
'ones',
'zeros',
'reverse',
'has_inf',
'has_nan',
'isfinite',
'range',
] ]
...@@ -826,3 +811,45 @@ def range(start, end, step, dtype): ...@@ -826,3 +811,45 @@ def range(start, end, step, dtype):
'Step': step}, 'Step': step},
outputs={'Out': [out]}) outputs={'Out': [out]})
return out return out
def linspace(start, stop, num, dtype):
"""
Return fixed number of evenly spaced values within a given interval.
First entry is start, and last entry is stop. In the case when Num is 1, only Start is returned. Like linspace function of numpy.
Args:
start(float|Variable): First entry in the sequence. It is a float scalar, or a tensor of shape [1] with type 'float32'|'float64'.
stop(float|Variable): Last entry in the sequence. It is a float scalar, or a tensor of shape [1] with type 'float32'|'float64'.
num(int|Variable): Number of entry in the sequence. It is an int scalar, or a tensor of shape [1] with type int32.
dtype(string): 'float32'|'float64', the data type of the output tensor.
Returns:
Variable: The tensor variable storing a 1-D tensor.
Examples:
.. code-block:: python
data = fluid.layers.linspace(0, 10, 5, 'float32') # [0.0, 2.5, 5.0, 7.5, 10.0]
data = fluid.layers.linspace(0, 10, 1, 'float32') # [0.0]
"""
helper = LayerHelper("linspace", **locals())
if not isinstance(start, Variable):
start = fill_constant([1], dtype, start)
if not isinstance(stop, Variable):
stop = fill_constant([1], dtype, stop)
if not isinstance(num, Variable):
num = fill_constant([1], 'int32', num)
out = helper.create_variable_for_type_inference(dtype=start.dtype)
helper.append_op(
type='linspace',
inputs={'Start': start,
'Stop': stop,
'Num': num},
outputs={'Out': [out]})
return out
...@@ -55,7 +55,7 @@ class Optimizer(object): ...@@ -55,7 +55,7 @@ class Optimizer(object):
""" """
def __init__(self, learning_rate, regularization=None, name=None): def __init__(self, learning_rate, regularization=None, name=None):
if framework._in_dygraph_mode(): if framework.in_dygraph_mode():
if not isinstance(learning_rate, float) and \ if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, LearningRateDecay): not isinstance(learning_rate, LearningRateDecay):
raise TypeError( raise TypeError(
...@@ -205,7 +205,7 @@ class Optimizer(object): ...@@ -205,7 +205,7 @@ class Optimizer(object):
name = self._name + "_" + name name = self._name + "_" + name
if (name in self._accumulators and if (name in self._accumulators and
param.name in self._accumulators[name]): param.name in self._accumulators[name]):
if framework._in_dygraph_mode(): if framework.in_dygraph_mode():
return self._accumulators[name][param.name] return self._accumulators[name][param.name]
raise Exception("Accumulator {} already exists for parameter {}". raise Exception("Accumulator {} already exists for parameter {}".
format(name, param.name)) format(name, param.name))
...@@ -363,7 +363,7 @@ class Optimizer(object): ...@@ -363,7 +363,7 @@ class Optimizer(object):
See examples in `apply_gradients`. See examples in `apply_gradients`.
""" """
self._dtype = loss.dtype self._dtype = loss.dtype
if framework._in_dygraph_mode(): if framework.in_dygraph_mode():
if parameter_list is not None: if parameter_list is not None:
parameters = parameter_list parameters = parameter_list
else: else:
...@@ -448,7 +448,7 @@ class Optimizer(object): ...@@ -448,7 +448,7 @@ class Optimizer(object):
Returns: Returns:
list: A list of operators appended to the current program. list: A list of operators appended to the current program.
""" """
if framework._in_dygraph_mode(): if framework.in_dygraph_mode():
with program_guard(framework.default_main_program(), with program_guard(framework.default_main_program(),
framework.default_startup_program()): framework.default_startup_program()):
optimize_ops = self._create_optimization_pass(params_grads) optimize_ops = self._create_optimization_pass(params_grads)
...@@ -628,16 +628,16 @@ class DGCMomentumOptimizer(MomentumOptimizer): ...@@ -628,16 +628,16 @@ class DGCMomentumOptimizer(MomentumOptimizer):
Original paper is https://arxiv.org/abs/1712.01887 Original paper is https://arxiv.org/abs/1712.01887
DGC reduce the communication bandwidth by sending only the important gradients (sparse update):\ DGC reduces the communication bandwidth by sending only the important gradients (sparse update):\
only gradients larger than a threshold are transmitted. only gradients larger than a threshold are transmitted.
To avoid losing information, DGC accumulate the rest of the gradients locally. To avoid losing information, DGC accumulates the rest of the gradients locally.
Eventually, these gradients become large enough to be transmitted. Eventually, these gradients become large enough to be transmitted.
Thus, DGC send the large gradients immediately but eventually send all of the gradients over time. Thus, DGC sends the large gradients immediately but eventually send all of the gradients over time.
To ensure no loss of accuracy, DGC employs momentum correc-tionandlocal gradient clipping on top of the gradient sparsification to maintain model performance. To ensure no loss of accuracy, DGC employs momentum correction and local gradient clipping on top of the gradient sparsification to maintain model performance.
DGC also uses momentum factor masking and warmup training to overcome the staleness problem caused by reduced communication. DGC also uses momentum factor masking and warmup training to overcome the staleness problem caused by reduced communication.
...@@ -652,7 +652,7 @@ class DGCMomentumOptimizer(MomentumOptimizer): ...@@ -652,7 +652,7 @@ class DGCMomentumOptimizer(MomentumOptimizer):
learning_rate (float|Variable): the learning rate used to update parameters. \ learning_rate (float|Variable): the learning rate used to update parameters. \
Can be a float value or a Variable with one float value as data element. Can be a float value or a Variable with one float value as data element.
momentum (float): Momentum factor. momentum (float): Momentum factor.
rampup_begin_step (int): The begining step from which gradient compression is implemented. rampup_begin_step (int): The beginning step from which gradient compression is implemented.
rampup_step (int): How long it use the sparsity periods. Default is 1. rampup_step (int): How long it use the sparsity periods. Default is 1.
for example: If the sparsity is [0.75, 0.9375, 0.984375, 0.996, 0.999], and the rampup_step is 5, \ for example: If the sparsity is [0.75, 0.9375, 0.984375, 0.996, 0.999], and the rampup_step is 5, \
it will use 0.75 at 0 step, and 0.9375 at 1 step, and so on. And when reach sparsity array ends, \ it will use 0.75 at 0 step, and 0.9375 at 1 step, and so on. And when reach sparsity array ends, \
...@@ -660,9 +660,9 @@ class DGCMomentumOptimizer(MomentumOptimizer): ...@@ -660,9 +660,9 @@ class DGCMomentumOptimizer(MomentumOptimizer):
sparsity (list[float]): Get top important element from gradient tensor, the ratio is (1 - current sparsity). sparsity (list[float]): Get top important element from gradient tensor, the ratio is (1 - current sparsity).
use_nesterov (bool): Enables Nesterov momentum. True means use nesterov. use_nesterov (bool): Enables Nesterov momentum. True means use nesterov.
local_grad_clip_norm (float): Clip norm value if needed. local_grad_clip_norm (float): Clip norm value if needed.
num_trainers: The number of training node. num_trainers: The number of training nodes.
regularization: A Regularizer, such as fluid.regularizer.L2DecayRegularizer. regularization: A Regularizer, such as fluid.regularizer.L2DecayRegularizer.
name: A optional name prefix. name: An optional name prefix.
Examples: Examples:
.. code-block:: python .. code-block:: python
......
...@@ -6,4 +6,6 @@ foreach(src ${TEST_OPS}) ...@@ -6,4 +6,6 @@ foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py) py_test(${src} SRCS ${src}.py)
endforeach() endforeach()
add_subdirectory(high-level-api) if(WITH_HIGH_LEVEL_API_TEST)
add_subdirectory(high-level-api)
endif()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*_new_api.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test # This test is buggy
foreach(src ${TEST_OPS}) # py_test(test_understand_sentiment_dynamic_rnn SRCS
py_test(${src} SRCS ${src}.py) # test_understand_sentiment_dynamic_rnn.py SERIAL)
endforeach() LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn_new_api)
add_subdirectory(fit_a_line) if(NOT APPLE)
add_subdirectory(recognize_digits) # default test
add_subdirectory(image_classification) foreach(src ${TEST_OPS})
add_subdirectory(understand_sentiment) py_test(${src} SRCS ${src}.py)
add_subdirectory(label_semantic_roles) endforeach()
add_subdirectory(word2vec) else()
add_subdirectory(recommender_system) foreach(src ${TEST_OPS})
add_subdirectory(machine_translation) if(${src} STREQUAL "test_image_classification_vgg_new_api")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif(${src} STREQUAL "test_image_classification_resnet_new_api")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif(${src} STREQUAL "test_recognize_digits_conv_new_api")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif(${src} STREQUAL "test_recognize_digits_mlp_new_api")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif()
py_test(${src} SRCS ${src}.py)
endif()
endforeach()
endif()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
if(NOT APPLE)
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
else()
foreach(src ${TEST_OPS})
if(${src} STREQUAL "test_image_classification_vgg")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif(${src} STREQUAL "test_image_classification_resnet")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif()
py_test(${src} SRCS ${src}.py)
endif()
endforeach()
endif()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
if(NOT APPLE)
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
else()
foreach(src ${TEST_OPS})
if(${src} STREQUAL "test_recognize_digits_conv")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif(${src} STREQUAL "test_recognize_digits_mlp")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
else()
py_test(${src} SRCS ${src}.py)
endif()
endforeach()
endif()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# This test is buggy
# py_test(test_understand_sentiment_dynamic_rnn SRCS
# test_understand_sentiment_dynamic_rnn.py SERIAL)
LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn)
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
...@@ -74,7 +74,6 @@ list(REMOVE_ITEM TEST_OPS test_dgc_op) ...@@ -74,7 +74,6 @@ list(REMOVE_ITEM TEST_OPS test_dgc_op)
list(REMOVE_ITEM TEST_OPS test_dist_se_resnext_nccl) list(REMOVE_ITEM TEST_OPS test_dist_se_resnext_nccl)
list(REMOVE_ITEM TEST_OPS test_dist_transformer) list(REMOVE_ITEM TEST_OPS test_dist_transformer)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer) list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer)
list(REMOVE_ITEM TEST_OPS test_image_classification_resnet)
list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op) list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op)
list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) list(REMOVE_ITEM TEST_OPS test_nearest_interp_op)
list(REMOVE_ITEM TEST_OPS test_imperative_resnet) list(REMOVE_ITEM TEST_OPS test_imperative_resnet)
...@@ -125,10 +124,6 @@ if(NOT WIN32) ...@@ -125,10 +124,6 @@ if(NOT WIN32)
py_test_modules(test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL) py_test_modules(test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL)
endif() endif()
if(NOT APPLE)
py_test_modules(test_image_classification_resnet MODULES test_image_classification_resnet SERIAL)
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug") if(CMAKE_BUILD_TYPE STREQUAL "Debug")
# change the timeout from 600 to 2200, because in debug mode, this test need more time. # change the timeout from 600 to 2200, because in debug mode, this test need more time.
set_tests_properties(test_parallel_executor_seresnext PROPERTIES TIMEOUT 2200) set_tests_properties(test_parallel_executor_seresnext PROPERTIES TIMEOUT 2200)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import six
def fake_imdb_reader(word_dict_size,
sample_num,
lower_seq_len=100,
upper_seq_len=200,
class_dim=2):
def __reader__():
for _ in six.moves.range(sample_num):
length = np.random.random_integers(
low=lower_seq_len, high=upper_seq_len, size=[1])[0]
ids = np.random.random_integers(
low=0, high=word_dict_size - 1, size=[length]).astype('int64')
label = np.random.random_integers(
low=0, high=class_dim - 1, size=[1]).astype('int64')[0]
yield ids, label
return __reader__
...@@ -29,7 +29,8 @@ __all__ = ['TestParallelExecutorBase'] ...@@ -29,7 +29,8 @@ __all__ = ['TestParallelExecutorBase']
class TestParallelExecutorBase(unittest.TestCase): class TestParallelExecutorBase(unittest.TestCase):
def check_network_convergence(self, @classmethod
def check_network_convergence(cls,
method, method,
use_cuda=True, use_cuda=True,
memory_opt=True, memory_opt=True,
......
...@@ -18,7 +18,7 @@ import numpy as np ...@@ -18,7 +18,7 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
class L1(fluid.dygraph.Layer): class L1(fluid.Layer):
def __init__(self, prefix): def __init__(self, prefix):
super(L1, self).__init__(prefix) super(L1, self).__init__(prefix)
self._param_attr = fluid.ParamAttr( self._param_attr = fluid.ParamAttr(
...@@ -32,7 +32,7 @@ class L1(fluid.dygraph.Layer): ...@@ -32,7 +32,7 @@ class L1(fluid.dygraph.Layer):
return self.w1 + self.w2 return self.w1 + self.w2
class L2(fluid.dygraph.Layer): class L2(fluid.Layer):
def __init__(self, prefix): def __init__(self, prefix):
super(L2, self).__init__(prefix) super(L2, self).__init__(prefix)
self.layer1 = L1(self.full_name()) self.layer1 = L1(self.full_name())
...@@ -42,7 +42,7 @@ class L2(fluid.dygraph.Layer): ...@@ -42,7 +42,7 @@ class L2(fluid.dygraph.Layer):
return self.layer1() + self.layer2() return self.layer1() + self.layer2()
class L3(fluid.dygraph.Layer): class L3(fluid.Layer):
def __init__(self, prefix): def __init__(self, prefix):
super(L3, self).__init__(prefix) super(L3, self).__init__(prefix)
self.layer1 = L2(self.full_name()) self.layer1 = L2(self.full_name())
...@@ -59,7 +59,7 @@ class TestBaseLayer(unittest.TestCase): ...@@ -59,7 +59,7 @@ class TestBaseLayer(unittest.TestCase):
ret = l() ret = l()
self.assertEqual(l.w1.name, "test_one_level/L1_0.w_0") self.assertEqual(l.w1.name, "test_one_level/L1_0.w_0")
self.assertEqual(l.w2.name, "test_one_level/L1_0.w_1") self.assertEqual(l.w2.name, "test_one_level/L1_0.w_1")
self.assertTrue(np.allclose(ret._numpy(), 0.2 * np.ones([2, 2]))) self.assertTrue(np.allclose(ret.numpy(), 0.2 * np.ones([2, 2])))
def test_three_level(self): def test_three_level(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
...@@ -72,7 +72,7 @@ class TestBaseLayer(unittest.TestCase): ...@@ -72,7 +72,7 @@ class TestBaseLayer(unittest.TestCase):
self.assertEqual(names[3], "test_three_level/L3_0/L2_0/L1_1.w_1") self.assertEqual(names[3], "test_three_level/L3_0/L2_0/L1_1.w_1")
self.assertEqual(names[4], "test_three_level/L3_0/L2_1/L1_0.w_0") self.assertEqual(names[4], "test_three_level/L3_0/L2_1/L1_0.w_0")
self.assertEqual(names[5], "test_three_level/L3_0/L2_1/L1_0.w_1") self.assertEqual(names[5], "test_three_level/L3_0/L2_1/L1_0.w_1")
self.assertTrue(np.allclose(ret._numpy(), 0.8 * np.ones([2, 2]))) self.assertTrue(np.allclose(ret.numpy(), 0.8 * np.ones([2, 2])))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -91,17 +91,26 @@ class TestBilinearInterpOp(OpTest): ...@@ -91,17 +91,26 @@ class TestBilinearInterpOp(OpTest):
self.op_type = "bilinear_interp" self.op_type = "bilinear_interp"
input_np = np.random.random(self.input_shape).astype("float32") input_np = np.random.random(self.input_shape).astype("float32")
output_np = bilinear_interp_np(input_np, self.out_h, self.out_w, if self.scale > 0:
self.out_size, self.actual_shape, out_h = int(self.input_shape[2] * self.scale)
self.align_corners, self.align_mode) out_w = int(self.input_shape[3] * self.scale)
else:
out_h = self.out_h
out_w = self.out_w
output_np = bilinear_interp_np(input_np, out_h, out_w, self.out_size,
self.actual_shape, self.align_corners,
self.align_mode)
self.inputs = {'X': input_np} self.inputs = {'X': input_np}
if self.out_size is not None: if self.out_size is not None:
self.inputs['OutSize'] = self.out_size self.inputs['OutSize'] = self.out_size
if self.actual_shape is not None: if self.actual_shape is not None:
self.inputs['OutSize'] = self.actual_shape self.inputs['OutSize'] = self.actual_shape
self.attrs = { self.attrs = {
'out_h': self.out_h, 'out_h': self.out_h,
'out_w': self.out_w, 'out_w': self.out_w,
'scale': self.scale,
'interp_method': self.interp_method, 'interp_method': self.interp_method,
'align_corners': self.align_corners, 'align_corners': self.align_corners,
'align_mode': self.align_mode 'align_mode': self.align_mode
...@@ -119,6 +128,7 @@ class TestBilinearInterpOp(OpTest): ...@@ -119,6 +128,7 @@ class TestBilinearInterpOp(OpTest):
self.input_shape = [2, 3, 4, 4] self.input_shape = [2, 3, 4, 4]
self.out_h = 2 self.out_h = 2
self.out_w = 2 self.out_w = 2
self.scale = 0.
self.out_size = np.array([3, 3]).astype("int32") self.out_size = np.array([3, 3]).astype("int32")
self.align_corners = True self.align_corners = True
self.align_mode = 1 self.align_mode = 1
...@@ -130,6 +140,7 @@ class TestBilinearInterpCase1(TestBilinearInterpOp): ...@@ -130,6 +140,7 @@ class TestBilinearInterpCase1(TestBilinearInterpOp):
self.input_shape = [4, 1, 7, 8] self.input_shape = [4, 1, 7, 8]
self.out_h = 1 self.out_h = 1
self.out_w = 1 self.out_w = 1
self.scale = 0.
self.align_corners = True self.align_corners = True
self.align_mode = 1 self.align_mode = 1
...@@ -140,6 +151,7 @@ class TestBilinearInterpCase2(TestBilinearInterpOp): ...@@ -140,6 +151,7 @@ class TestBilinearInterpCase2(TestBilinearInterpOp):
self.input_shape = [3, 3, 9, 6] self.input_shape = [3, 3, 9, 6]
self.out_h = 12 self.out_h = 12
self.out_w = 12 self.out_w = 12
self.scale = 0.
self.align_corners = True self.align_corners = True
self.align_mode = 1 self.align_mode = 1
...@@ -150,6 +162,7 @@ class TestBilinearInterpCase3(TestBilinearInterpOp): ...@@ -150,6 +162,7 @@ class TestBilinearInterpCase3(TestBilinearInterpOp):
self.input_shape = [1, 1, 128, 64] self.input_shape = [1, 1, 128, 64]
self.out_h = 64 self.out_h = 64
self.out_w = 128 self.out_w = 128
self.scale = 0.
self.align_corners = True self.align_corners = True
self.align_mode = 1 self.align_mode = 1
...@@ -160,6 +173,7 @@ class TestBilinearInterpCase4(TestBilinearInterpOp): ...@@ -160,6 +173,7 @@ class TestBilinearInterpCase4(TestBilinearInterpOp):
self.input_shape = [4, 1, 7, 8] self.input_shape = [4, 1, 7, 8]
self.out_h = 1 self.out_h = 1
self.out_w = 1 self.out_w = 1
self.scale = 0.
self.out_size = np.array([2, 2]).astype("int32") self.out_size = np.array([2, 2]).astype("int32")
self.align_corners = True self.align_corners = True
self.align_mode = 1 self.align_mode = 1
...@@ -171,6 +185,7 @@ class TestBilinearInterpCase5(TestBilinearInterpOp): ...@@ -171,6 +185,7 @@ class TestBilinearInterpCase5(TestBilinearInterpOp):
self.input_shape = [3, 3, 9, 6] self.input_shape = [3, 3, 9, 6]
self.out_h = 12 self.out_h = 12
self.out_w = 12 self.out_w = 12
self.scale = 0.
self.out_size = np.array([11, 11]).astype("int32") self.out_size = np.array([11, 11]).astype("int32")
self.align_corners = True self.align_corners = True
self.align_mode = 1 self.align_mode = 1
...@@ -182,6 +197,7 @@ class TestBilinearInterpCase6(TestBilinearInterpOp): ...@@ -182,6 +197,7 @@ class TestBilinearInterpCase6(TestBilinearInterpOp):
self.input_shape = [1, 1, 128, 64] self.input_shape = [1, 1, 128, 64]
self.out_h = 64 self.out_h = 64
self.out_w = 128 self.out_w = 128
self.scale = 0.
self.out_size = np.array([65, 129]).astype("int32") self.out_size = np.array([65, 129]).astype("int32")
self.align_corners = True self.align_corners = True
self.align_mode = 1 self.align_mode = 1
...@@ -193,6 +209,7 @@ class TestBilinearInterpActualShape(TestBilinearInterpOp): ...@@ -193,6 +209,7 @@ class TestBilinearInterpActualShape(TestBilinearInterpOp):
self.input_shape = [3, 2, 32, 16] self.input_shape = [3, 2, 32, 16]
self.out_h = 64 self.out_h = 64
self.out_w = 32 self.out_w = 32
self.scale = 0.
self.out_size = np.array([66, 40]).astype("int32") self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = True self.align_corners = True
self.align_mode = 1 self.align_mode = 1
...@@ -206,15 +223,25 @@ class TestBilinearInterpOpUint8(OpTest): ...@@ -206,15 +223,25 @@ class TestBilinearInterpOpUint8(OpTest):
self.op_type = "bilinear_interp" self.op_type = "bilinear_interp"
input_np = np.random.randint( input_np = np.random.randint(
low=0, high=256, size=self.input_shape).astype("uint8") low=0, high=256, size=self.input_shape).astype("uint8")
output_np = bilinear_interp_np(input_np, self.out_h, self.out_w,
self.out_size, self.actual_shape, if self.scale > 0:
self.align_corners, self.align_mode) out_h = int(self.input_shape[2] * self.scale)
out_w = int(self.input_shape[3] * self.scale)
else:
out_h = self.out_h
out_w = self.out_w
output_np = bilinear_interp_np(input_np, out_h, out_w, self.out_size,
self.actual_shape, self.align_corners,
self.align_mode)
self.inputs = {'X': input_np} self.inputs = {'X': input_np}
if self.out_size is not None: if self.out_size is not None:
self.inputs['OutSize'] = self.out_size self.inputs['OutSize'] = self.out_size
self.attrs = { self.attrs = {
'out_h': self.out_h, 'out_h': self.out_h,
'out_w': self.out_w, 'out_w': self.out_w,
'scale': self.scale,
'interp_method': self.interp_method, 'interp_method': self.interp_method,
'align_corners': self.align_corners, 'align_corners': self.align_corners,
'align_mode': self.align_mode 'align_mode': self.align_mode
...@@ -229,6 +256,7 @@ class TestBilinearInterpOpUint8(OpTest): ...@@ -229,6 +256,7 @@ class TestBilinearInterpOpUint8(OpTest):
self.input_shape = [1, 3, 9, 6] self.input_shape = [1, 3, 9, 6]
self.out_h = 10 self.out_h = 10
self.out_w = 9 self.out_w = 9
self.scale = 0.
self.align_corners = True self.align_corners = True
self.align_mode = 1 self.align_mode = 1
...@@ -239,6 +267,7 @@ class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8): ...@@ -239,6 +267,7 @@ class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8):
self.input_shape = [2, 3, 128, 64] self.input_shape = [2, 3, 128, 64]
self.out_h = 120 self.out_h = 120
self.out_w = 50 self.out_w = 50
self.scale = 0.
self.align_corners = True self.align_corners = True
self.align_mode = 1 self.align_mode = 1
...@@ -249,6 +278,7 @@ class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8): ...@@ -249,6 +278,7 @@ class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8):
self.input_shape = [4, 1, 7, 8] self.input_shape = [4, 1, 7, 8]
self.out_h = 5 self.out_h = 5
self.out_w = 13 self.out_w = 13
self.scale = 0.
self.out_size = np.array([6, 15]).astype("int32") self.out_size = np.array([6, 15]).astype("int32")
self.align_corners = True self.align_corners = True
self.align_mode = 1 self.align_mode = 1
...@@ -272,5 +302,38 @@ class TestBilinearInterpWithMethod3(TestBilinearInterpOp): ...@@ -272,5 +302,38 @@ class TestBilinearInterpWithMethod3(TestBilinearInterpOp):
self.align_mode = 0 self.align_mode = 0
class TestBilinearInterpScale1(TestBilinearInterpOp):
def init_test_case(self):
self.interp_method = 'bilinear'
self.input_shape = [2, 3, 16, 32]
self.out_h = 60
self.out_w = 25
self.scale = 2.
self.align_corners = True
self.align_mode = 1
class TestBilinearInterpScale2(TestBilinearInterpOp):
def init_test_case(self):
self.interp_method = 'bilinear'
self.input_shape = [2, 3, 16, 32]
self.out_h = 60
self.out_w = 25
self.scale = 1.
self.align_corners = True
self.align_mode = 1
class TestBilinearInterpScale3(TestBilinearInterpOp):
def init_test_case(self):
self.interp_method = 'bilinear'
self.input_shape = [2, 3, 16, 32]
self.out_h = 60
self.out_w = 25
self.scale = 1.5
self.align_corners = True
self.align_mode = 1
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -19,7 +19,7 @@ import time ...@@ -19,7 +19,7 @@ import time
import six import six
import unittest import unittest
EPOCH_NUM = 60 EPOCH_NUM = 20
BATCH_SIZE = 32 BATCH_SIZE = 32
CLASS_NUM = 10 CLASS_NUM = 10
......
...@@ -22,6 +22,8 @@ import paddle ...@@ -22,6 +22,8 @@ import paddle
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import compiler from paddle.fluid import compiler
import numpy as np
from fake_reader import fake_imdb_reader
def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2): def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
...@@ -35,16 +37,16 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2): ...@@ -35,16 +37,16 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
) )
return return
word_dict = paddle.dataset.imdb.word_dict() word_dict_size = 5147
train_reader = paddle.batch( reader = fake_imdb_reader(word_dict_size, batch_size * 40)
paddle.dataset.imdb.train(word_dict), batch_size=batch_size) train_reader = paddle.batch(reader, batch_size=batch_size)
data = fluid.layers.data( data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1) name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64") label = fluid.layers.data(name="label", shape=[1], dtype="int64")
cost = network(data, label, len(word_dict)) cost = network(data, label, word_dict_size)
cost.persistable = True cost.persistable = True
optimizer = fluid.optimizer.Adagrad(learning_rate=0.2) optimizer = fluid.optimizer.Adagrad(learning_rate=0.2)
optimizer.minimize(cost) optimizer.minimize(cost)
......
...@@ -18,20 +18,21 @@ import importlib ...@@ -18,20 +18,21 @@ import importlib
fluid.core._set_eager_deletion_mode(0.0, 1.0, True) fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
from test_bilinear_interp_op import *
from test_concat_op import *
from test_elementwise_add_op import * from test_elementwise_add_op import *
from test_elementwise_sub_op import * from test_elementwise_sub_op import *
from test_concat_op import * from test_fill_constant_batch_size_like_op import *
from test_fill_zeros_like2_op import *
from test_gather_op import * from test_gather_op import *
from test_gaussian_random_batch_size_like_op import * from test_gaussian_random_batch_size_like_op import *
from test_uniform_random_batch_size_like_op import * from test_linear_chain_crf_op import *
from test_fill_constant_batch_size_like_op import *
from test_lod_reset_op import * from test_lod_reset_op import *
from test_scatter_op import * from test_lookup_table_op import *
from test_mean_op import * from test_mean_op import *
from test_slice_op import *
from test_linear_chain_crf_op import *
from test_bilinear_interp_op import *
from test_nearest_interp_op import * from test_nearest_interp_op import *
from test_pad2d_op import *
from test_scatter_op import *
from test_sequence_concat import * from test_sequence_concat import *
from test_seq_conv import * from test_seq_conv import *
from test_seq_pool import * from test_seq_pool import *
...@@ -41,8 +42,10 @@ from test_sequence_pad_op import * ...@@ -41,8 +42,10 @@ from test_sequence_pad_op import *
from test_sequence_unpad_op import * from test_sequence_unpad_op import *
from test_sequence_scatter_op import * from test_sequence_scatter_op import *
from test_sequence_slice_op import * from test_sequence_slice_op import *
from test_pad2d_op import * from test_slice_op import *
from test_fill_zeros_like2_op import * from test_space_to_depth_op import *
from test_squared_l2_distance_op import *
from test_uniform_random_batch_size_like_op import *
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -19,6 +19,8 @@ import numpy as np ...@@ -19,6 +19,8 @@ import numpy as np
import paddle import paddle
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.fluid as fluid import paddle.fluid as fluid
import six
from fake_reader import fake_imdb_reader
def bow_net(data, def bow_net(data,
...@@ -48,11 +50,10 @@ def bow_net(data, ...@@ -48,11 +50,10 @@ def bow_net(data,
class TestGradientClip(unittest.TestCase): class TestGradientClip(unittest.TestCase):
def setUp(self): def setUp(self):
self.word_dict = paddle.dataset.imdb.word_dict() self.word_dict_len = 5147
self.BATCH_SIZE = 2 self.BATCH_SIZE = 2
self.train_data = paddle.batch( reader = fake_imdb_reader(self.word_dict_len, self.BATCH_SIZE * 100)
paddle.dataset.imdb.train(self.word_dict), self.train_data = paddle.batch(reader, batch_size=self.BATCH_SIZE)
batch_size=self.BATCH_SIZE)
def get_places(self): def get_places(self):
places = [core.CPUPlace()] places = [core.CPUPlace()]
...@@ -131,7 +132,7 @@ class TestGradientClip(unittest.TestCase): ...@@ -131,7 +132,7 @@ class TestGradientClip(unittest.TestCase):
data = fluid.layers.data( data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1) name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64") label = fluid.layers.data(name="label", shape=[1], dtype="int64")
cost = bow_net(data, label, len(self.word_dict)) cost = bow_net(data, label, self.word_dict_len)
fluid.clip.set_gradient_clip( fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0)) clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0))
......
...@@ -18,11 +18,11 @@ import numpy as np ...@@ -18,11 +18,11 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.dygraph.nn import FC from paddle.fluid import FC
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
class MyLayer(fluid.dygraph.Layer): class MyLayer(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(MyLayer, self).__init__(name_scope) super(MyLayer, self).__init__(name_scope)
...@@ -34,7 +34,7 @@ class MyLayer(fluid.dygraph.Layer): ...@@ -34,7 +34,7 @@ class MyLayer(fluid.dygraph.Layer):
return [x] return [x]
class MyPyLayer(fluid.dygraph.PyLayer): class MyPyLayer(fluid.PyLayer):
def __init__(self): def __init__(self):
super(MyPyLayer, self).__init__() super(MyPyLayer, self).__init__()
...@@ -48,7 +48,7 @@ class MyPyLayer(fluid.dygraph.PyLayer): ...@@ -48,7 +48,7 @@ class MyPyLayer(fluid.dygraph.PyLayer):
return np.array(dout) * (1 - np.square(np.array(out))) return np.array(dout) * (1 - np.square(np.array(out)))
class MLP(fluid.dygraph.Layer): class MLP(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), self._fc1 = FC(self.full_name(),
...@@ -71,7 +71,7 @@ class MLP(fluid.dygraph.Layer): ...@@ -71,7 +71,7 @@ class MLP(fluid.dygraph.Layer):
return x return x
class SimpleRNNCell(fluid.dygraph.Layer): class SimpleRNNCell(fluid.Layer):
def __init__(self, name_scope, step_input_size, hidden_size, output_size, def __init__(self, name_scope, step_input_size, hidden_size, output_size,
param_attr): param_attr):
super(SimpleRNNCell, self).__init__(name_scope) super(SimpleRNNCell, self).__init__(name_scope)
...@@ -81,7 +81,7 @@ class SimpleRNNCell(fluid.dygraph.Layer): ...@@ -81,7 +81,7 @@ class SimpleRNNCell(fluid.dygraph.Layer):
self._dtype = core.VarDesc.VarType.FP32 self._dtype = core.VarDesc.VarType.FP32
self.param_attr = param_attr self.param_attr = param_attr
def _build_once(self, inputs, pre_hidden): def build_once(self, inputs, pre_hidden):
i2h_param_shape = [self.step_input_size, self.hidden_size] i2h_param_shape = [self.step_input_size, self.hidden_size]
h2h_param_shape = [self.hidden_size, self.hidden_size] h2h_param_shape = [self.hidden_size, self.hidden_size]
h2o_param_shape = [self.output_size, self.hidden_size] h2o_param_shape = [self.output_size, self.hidden_size]
...@@ -159,7 +159,7 @@ class SimpleRNNCell(fluid.dygraph.Layer): ...@@ -159,7 +159,7 @@ class SimpleRNNCell(fluid.dygraph.Layer):
return reduce_out, hidden return reduce_out, hidden
class SimpleRNN(fluid.dygraph.Layer): class SimpleRNN(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(SimpleRNN, self).__init__(name_scope) super(SimpleRNN, self).__init__(name_scope)
self.seq_len = 4 self.seq_len = 4
...@@ -200,22 +200,22 @@ class TestImperative(unittest.TestCase): ...@@ -200,22 +200,22 @@ class TestImperative(unittest.TestCase):
inputs.append(fluid.dygraph.base.to_variable(x)) inputs.append(fluid.dygraph.base.to_variable(x))
ret = fluid.layers.sums(inputs) ret = fluid.layers.sums(inputs)
loss = fluid.layers.reduce_sum(ret) loss = fluid.layers.reduce_sum(ret)
loss._backward() loss.backward()
self.assertTrue(np.allclose(ret._numpy(), x * 10)) self.assertTrue(np.allclose(ret.numpy(), x * 10))
self.assertTrue(np.allclose(inputs[0]._gradient(), x)) self.assertTrue(np.allclose(inputs[0].gradient(), x))
def test_layer(self): def test_layer(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
cl = core.Layer() cl = core.Layer()
cl.forward([]) cl.forward([])
l = fluid.dygraph.Layer("l") l = fluid.Layer("l")
self.assertRaises(NotImplementedError, l.forward, []) self.assertRaises(NotImplementedError, l.forward, [])
def test_pylayer_func_id(self): def test_pylayer_func_id(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
class PyLayer1(fluid.dygraph.PyLayer): class PyLayer1(fluid.PyLayer):
def __init__(self): def __init__(self):
super(PyLayer1, self).__init__() super(PyLayer1, self).__init__()
...@@ -227,7 +227,7 @@ class TestImperative(unittest.TestCase): ...@@ -227,7 +227,7 @@ class TestImperative(unittest.TestCase):
def backward(input): def backward(input):
return input return input
class PyLayer2(fluid.dygraph.PyLayer): class PyLayer2(fluid.PyLayer):
def __init__(self): def __init__(self):
super(PyLayer2, self).__init__() super(PyLayer2, self).__init__()
...@@ -257,9 +257,9 @@ class TestImperative(unittest.TestCase): ...@@ -257,9 +257,9 @@ class TestImperative(unittest.TestCase):
my_py_layer = MyPyLayer() my_py_layer = MyPyLayer()
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = fluid.dygraph.base.to_variable(np_inp)
outs = my_py_layer(var_inp) outs = my_py_layer(var_inp)
dy_out = np.sum(outs[0]._numpy()) dy_out = np.sum(outs[0].numpy())
outs[0]._backward() outs[0].backward()
dy_grad = var_inp._gradient() dy_grad = var_inp.gradient()
with new_program_scope(): with new_program_scope():
inp = fluid.layers.data( inp = fluid.layers.data(
...@@ -287,9 +287,9 @@ class TestImperative(unittest.TestCase): ...@@ -287,9 +287,9 @@ class TestImperative(unittest.TestCase):
l = MyLayer("my_layer") l = MyLayer("my_layer")
x = l(var_inp)[0] x = l(var_inp)[0]
self.assertIsNotNone(x) self.assertIsNotNone(x)
dy_out = x._numpy() dy_out = x.numpy()
x._backward() x.backward()
dy_grad = l._x_for_debug._gradient() dy_grad = l._x_for_debug.gradient()
with new_program_scope(): with new_program_scope():
inp = fluid.layers.data( inp = fluid.layers.data(
...@@ -314,9 +314,9 @@ class TestImperative(unittest.TestCase): ...@@ -314,9 +314,9 @@ class TestImperative(unittest.TestCase):
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = fluid.dygraph.base.to_variable(np_inp)
mlp = MLP("mlp") mlp = MLP("mlp")
out = mlp(var_inp) out = mlp(var_inp)
dy_out = out._numpy() dy_out = out.numpy()
out._backward() out.backward()
dy_grad = mlp._fc1._w._gradient() dy_grad = mlp._fc1._w.gradient()
with new_program_scope(): with new_program_scope():
inp = fluid.layers.data( inp = fluid.layers.data(
...@@ -358,7 +358,7 @@ class TestImperative(unittest.TestCase): ...@@ -358,7 +358,7 @@ class TestImperative(unittest.TestCase):
x = fluid.layers.elementwise_add(inp1, inp2) x = fluid.layers.elementwise_add(inp1, inp2)
else: else:
x = fluid.layers.elementwise_sub(inp1, inp2) x = fluid.layers.elementwise_sub(inp1, inp2)
dygraph_result = x._numpy() dygraph_result = x.numpy()
# static graph # static graph
with new_program_scope(): with new_program_scope():
...@@ -407,11 +407,11 @@ class TestImperative(unittest.TestCase): ...@@ -407,11 +407,11 @@ class TestImperative(unittest.TestCase):
var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3])
simple_rnn = SimpleRNN("simple_rnn") simple_rnn = SimpleRNN("simple_rnn")
outs, pre_hiddens = simple_rnn.forward(var_inp) outs, pre_hiddens = simple_rnn.forward(var_inp)
dy_out = outs[3]._numpy() dy_out = outs[3].numpy()
outs[3]._backward() outs[3].backward()
dy_grad_h2o = simple_rnn._cell._h2o_w._gradient() dy_grad_h2o = simple_rnn._cell._h2o_w.gradient()
dy_grad_h2h = simple_rnn._cell._h2h_w._gradient() dy_grad_h2h = simple_rnn._cell._h2h_w.gradient()
dy_grad_i2h = simple_rnn._cell._i2h_w._gradient() dy_grad_i2h = simple_rnn._cell._i2h_w.gradient()
with new_program_scope(): with new_program_scope():
inp = fluid.layers.data( inp = fluid.layers.data(
......
...@@ -18,11 +18,11 @@ import numpy as np ...@@ -18,11 +18,11 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC from paddle.fluid import Conv2D, Pool2D, FC
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -71,7 +71,7 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -71,7 +71,7 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
return x return x
class MNIST(fluid.dygraph.Layer): class MNIST(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__(name_scope)
...@@ -125,21 +125,21 @@ class TestDygraphCheckpoint(unittest.TestCase): ...@@ -125,21 +125,21 @@ class TestDygraphCheckpoint(unittest.TestCase):
img = to_variable(dy_x_data) img = to_variable(dy_x_data)
label = to_variable(y_data) label = to_variable(y_data)
label._stop_gradient = True label.stop_gradient = True
cost = mnist(img) cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label) loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss) avg_loss = fluid.layers.mean(loss)
dy_out = avg_loss._numpy() dy_out = avg_loss.numpy()
avg_loss._backward() avg_loss.backward()
sgd.minimize(avg_loss) sgd.minimize(avg_loss)
fluid.dygraph.save_persistables(mnist, "save_dir") fluid.dygraph.save_persistables(mnist, "save_dir")
mnist.clear_gradients() mnist.clear_gradients()
for param in mnist.parameters(): for param in mnist.parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param.numpy()
mnist.load_dict( mnist.load_dict(
fluid.dygraph.load_persistables(mnist, "save_dir")) fluid.dygraph.load_persistables(mnist, "save_dir"))
......
...@@ -32,11 +32,11 @@ NUM_BATCHES = int(os.environ.get('NUM_BATCHES', 5)) ...@@ -32,11 +32,11 @@ NUM_BATCHES = int(os.environ.get('NUM_BATCHES', 5))
NUM_EPOCHES = int(os.environ.get('NUM_EPOCHES', 1)) NUM_EPOCHES = int(os.environ.get('NUM_EPOCHES', 1))
class DMF(fluid.dygraph.Layer): class DMF(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(DMF, self).__init__(name_scope) super(DMF, self).__init__(name_scope)
self._user_latent = fluid.dygraph.FC(self.full_name(), 256) self._user_latent = fluid.FC(self.full_name(), 256)
self._item_latent = fluid.dygraph.FC(self.full_name(), 256) self._item_latent = fluid.FC(self.full_name(), 256)
self._user_layers = [] self._user_layers = []
self._item_layers = [] self._item_layers = []
...@@ -45,13 +45,11 @@ class DMF(fluid.dygraph.Layer): ...@@ -45,13 +45,11 @@ class DMF(fluid.dygraph.Layer):
self._user_layers.append( self._user_layers.append(
self.add_sublayer( self.add_sublayer(
'user_layer_%d' % i, 'user_layer_%d' % i,
fluid.dygraph.FC( fluid.FC(self.full_name(), self._hid_sizes[i], act='relu')))
self.full_name(), self._hid_sizes[i], act='relu')))
self._item_layers.append( self._item_layers.append(
self.add_sublayer( self.add_sublayer(
'item_layer_%d' % i, 'item_layer_%d' % i,
fluid.dygraph.FC( fluid.FC(self.full_name(), self._hid_sizes[i], act='relu')))
self.full_name(), self._hid_sizes[i], act='relu')))
def forward(self, users, items): def forward(self, users, items):
users = self._user_latent(users) users = self._user_latent(users)
...@@ -63,19 +61,18 @@ class DMF(fluid.dygraph.Layer): ...@@ -63,19 +61,18 @@ class DMF(fluid.dygraph.Layer):
return fluid.layers.elementwise_mul(users, items) return fluid.layers.elementwise_mul(users, items)
class MLP(fluid.dygraph.Layer): class MLP(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__(name_scope)
self._user_latent = fluid.dygraph.FC(self.full_name(), 256) self._user_latent = fluid.FC(self.full_name(), 256)
self._item_latent = fluid.dygraph.FC(self.full_name(), 256) self._item_latent = fluid.FC(self.full_name(), 256)
self._match_layers = [] self._match_layers = []
self._hid_sizes = [128, 64] self._hid_sizes = [128, 64]
for i in range(len(self._hid_sizes)): for i in range(len(self._hid_sizes)):
self._match_layers.append( self._match_layers.append(
self.add_sublayer( self.add_sublayer(
'match_layer_%d' % i, 'match_layer_%d' % i,
fluid.dygraph.FC( fluid.FC(self.full_name(), self._hid_sizes[i], act='relu')))
self.full_name(), self._hid_sizes[i], act='relu')))
self._mat self._mat
def forward(self, users, items): def forward(self, users, items):
...@@ -88,7 +85,7 @@ class MLP(fluid.dygraph.Layer): ...@@ -88,7 +85,7 @@ class MLP(fluid.dygraph.Layer):
return match_vec return match_vec
class DeepCF(fluid.dygraph.Layer): class DeepCF(fluid.Layer):
def __init__(self, name_scope, num_users, num_items, matrix): def __init__(self, name_scope, num_users, num_items, matrix):
super(DeepCF, self).__init__(name_scope) super(DeepCF, self).__init__(name_scope)
self._num_users = num_users self._num_users = num_users
...@@ -99,11 +96,11 @@ class DeepCF(fluid.dygraph.Layer): ...@@ -99,11 +96,11 @@ class DeepCF(fluid.dygraph.Layer):
matrix.dtype, matrix.dtype,
is_bias=False, is_bias=False,
default_initializer=fluid.initializer.NumpyArrayInitializer(matrix)) default_initializer=fluid.initializer.NumpyArrayInitializer(matrix))
self._rating_matrix._stop_gradient = True self._rating_matrix.stop_gradient = True
self._mlp = MLP(self.full_name()) self._mlp = MLP(self.full_name())
self._dmf = DMF(self.full_name()) self._dmf = DMF(self.full_name())
self._match_fc = fluid.dygraph.FC(self.full_name(), 1, act='sigmoid') self._match_fc = fluid.FC(self.full_name(), 1, act='sigmoid')
def forward(self, users, items): def forward(self, users, items):
# users_emb = self._user_emb(users) # users_emb = self._user_emb(users)
...@@ -255,10 +252,10 @@ class TestDygraphDeepCF(unittest.TestCase): ...@@ -255,10 +252,10 @@ class TestDygraphDeepCF(unittest.TestCase):
fluid.layers.log_loss(prediction, fluid.layers.log_loss(prediction,
to_variable(labels_np[ to_variable(labels_np[
slice:slice + BATCH_SIZE]))) slice:slice + BATCH_SIZE])))
loss._backward() loss.backward()
adam.minimize(loss) adam.minimize(loss)
deepcf.clear_gradients() deepcf.clear_gradients()
dy_loss = loss._numpy() dy_loss = loss.numpy()
sys.stderr.write('dynamic loss: %s %s\n' % (slice, dy_loss)) sys.stderr.write('dynamic loss: %s %s\n' % (slice, dy_loss))
self.assertEqual(static_loss, dy_loss) self.assertEqual(static_loss, dy_loss)
......
...@@ -22,12 +22,12 @@ import paddle ...@@ -22,12 +22,12 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC from paddle.fluid import Conv2D, Pool2D, FC
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
class Discriminator(fluid.dygraph.Layer): class Discriminator(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(Discriminator, self).__init__(name_scope) super(Discriminator, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), size=32, act='elu') self._fc1 = FC(self.full_name(), size=32, act='elu')
...@@ -38,7 +38,7 @@ class Discriminator(fluid.dygraph.Layer): ...@@ -38,7 +38,7 @@ class Discriminator(fluid.dygraph.Layer):
return self._fc2(x) return self._fc2(x)
class Generator(fluid.dygraph.Layer): class Generator(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(Generator, self).__init__(name_scope) super(Generator, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), size=64, act='elu') self._fc1 = FC(self.full_name(), size=64, act='elu')
...@@ -150,7 +150,7 @@ class TestDygraphGAN(unittest.TestCase): ...@@ -150,7 +150,7 @@ class TestDygraphGAN(unittest.TestCase):
x=d_fake, label=to_variable(np.zeros([2, 1], np.float32)))) x=d_fake, label=to_variable(np.zeros([2, 1], np.float32))))
d_loss = d_loss_real + d_loss_fake d_loss = d_loss_real + d_loss_fake
d_loss._backward() d_loss.backward()
sgd.minimize(d_loss) sgd.minimize(d_loss)
discriminator.clear_gradients() discriminator.clear_gradients()
generator.clear_gradients() generator.clear_gradients()
...@@ -160,15 +160,15 @@ class TestDygraphGAN(unittest.TestCase): ...@@ -160,15 +160,15 @@ class TestDygraphGAN(unittest.TestCase):
g_loss = fluid.layers.reduce_mean( g_loss = fluid.layers.reduce_mean(
fluid.layers.sigmoid_cross_entropy_with_logits( fluid.layers.sigmoid_cross_entropy_with_logits(
x=d_fake, label=to_variable(np.ones([2, 1], np.float32)))) x=d_fake, label=to_variable(np.ones([2, 1], np.float32))))
g_loss._backward() g_loss.backward()
sgd.minimize(g_loss) sgd.minimize(g_loss)
for p in discriminator.parameters(): for p in discriminator.parameters():
dy_params[p.name] = p._numpy() dy_params[p.name] = p.numpy()
for p in generator.parameters(): for p in generator.parameters():
dy_params[p.name] = p._numpy() dy_params[p.name] = p.numpy()
dy_g_loss = g_loss._numpy() dy_g_loss = g_loss.numpy()
dy_d_loss = d_loss._numpy() dy_d_loss = d_loss.numpy()
self.assertEqual(dy_g_loss, static_g_loss) self.assertEqual(dy_g_loss, static_g_loss)
self.assertEqual(dy_d_loss, static_d_loss) self.assertEqual(dy_d_loss, static_d_loss)
......
...@@ -15,14 +15,12 @@ ...@@ -15,14 +15,12 @@
import contextlib import contextlib
import unittest import unittest
import numpy as np import numpy as np
import six
import sys import sys
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
...@@ -31,7 +29,7 @@ def gen_data(): ...@@ -31,7 +29,7 @@ def gen_data():
pass pass
class GraphConv(fluid.dygraph.Layer): class GraphConv(fluid.Layer):
def __init__(self, name_scope, in_features, out_features): def __init__(self, name_scope, in_features, out_features):
super(GraphConv, self).__init__(name_scope) super(GraphConv, self).__init__(name_scope)
...@@ -50,7 +48,7 @@ class GraphConv(fluid.dygraph.Layer): ...@@ -50,7 +48,7 @@ class GraphConv(fluid.dygraph.Layer):
return fluid.layers.matmul(adj, support) + self.bias return fluid.layers.matmul(adj, support) + self.bias
class GCN(fluid.dygraph.Layer): class GCN(fluid.Layer):
def __init__(self, name_scope, num_hidden): def __init__(self, name_scope, num_hidden):
super(GCN, self).__init__(name_scope) super(GCN, self).__init__(name_scope)
self.gc = GraphConv(self.full_name(), num_hidden, 32) self.gc = GraphConv(self.full_name(), num_hidden, 32)
...@@ -134,10 +132,9 @@ class TestDygraphGNN(unittest.TestCase): ...@@ -134,10 +132,9 @@ class TestDygraphGNN(unittest.TestCase):
loss = fluid.layers.reduce_sum(loss) loss = fluid.layers.reduce_sum(loss)
adam = AdamOptimizer(learning_rate=1e-3) adam = AdamOptimizer(learning_rate=1e-3)
adam.minimize(loss) adam.minimize(loss)
self.assertEqual(static_loss, loss._numpy()) self.assertEqual(static_loss, loss.numpy())
self.assertTrue( self.assertTrue(np.allclose(static_weight, model.gc.weight.numpy()))
np.allclose(static_weight, model.gc.weight._numpy())) sys.stderr.write('%s %s\n' % (static_loss, loss.numpy()))
sys.stderr.write('%s %s\n' % (static_loss, loss._numpy()))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -128,25 +128,25 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -128,25 +128,25 @@ class TestImperativeMnist(unittest.TestCase):
img = to_variable(dy_x_data) img = to_variable(dy_x_data)
label = to_variable(y_data) label = to_variable(y_data)
label._stop_gradient = True label.stop_gradient = True
cost = mnist(img) cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label) loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss) avg_loss = fluid.layers.mean(loss)
dy_out = avg_loss._numpy() dy_out = avg_loss.numpy()
if epoch == 0 and batch_id == 0: if epoch == 0 and batch_id == 0:
for param in mnist.parameters(): for param in mnist.parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param.numpy()
avg_loss._backward() avg_loss.backward()
sgd.minimize(avg_loss) sgd.minimize(avg_loss)
mnist.clear_gradients() mnist.clear_gradients()
dy_param_value = {} dy_param_value = {}
for param in mnist.parameters(): for param in mnist.parameters():
dy_param_value[param.name] = param._numpy() dy_param_value[param.name] = param.numpy()
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
......
...@@ -28,7 +28,7 @@ from paddle.fluid.dygraph.base import to_variable ...@@ -28,7 +28,7 @@ from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
class MLP(fluid.dygraph.Layer): class MLP(fluid.Layer):
def __init__(self, name_scope, param_attr=None, bias_attr=None): def __init__(self, name_scope, param_attr=None, bias_attr=None):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__(name_scope)
...@@ -75,18 +75,18 @@ class TestImperativeOptimizerBase(unittest.TestCase): ...@@ -75,18 +75,18 @@ class TestImperativeOptimizerBase(unittest.TestCase):
cost = mlp(img) cost = mlp(img)
avg_loss = fluid.layers.reduce_mean(cost) avg_loss = fluid.layers.reduce_mean(cost)
dy_out = avg_loss._numpy() dy_out = avg_loss.numpy()
if batch_id == 0: if batch_id == 0:
for param in mlp.parameters(): for param in mlp.parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param.numpy()
avg_loss._backward() avg_loss.backward()
optimizer.minimize(avg_loss) optimizer.minimize(avg_loss)
mlp.clear_gradients() mlp.clear_gradients()
dy_param_value = {} dy_param_value = {}
for param in mlp.parameters(): for param in mlp.parameters():
dy_param_value[param.name] = param._numpy() dy_param_value[param.name] = param.numpy()
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
......
...@@ -24,10 +24,9 @@ from paddle.fluid.dygraph.base import to_variable ...@@ -24,10 +24,9 @@ from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
import numpy as np import numpy as np
import six import six
from paddle.fluid.backward import append_backward
class SimpleLSTMRNN(fluid.dygraph.Layer): class SimpleLSTMRNN(fluid.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
hidden_size, hidden_size,
...@@ -45,7 +44,7 @@ class SimpleLSTMRNN(fluid.dygraph.Layer): ...@@ -45,7 +44,7 @@ class SimpleLSTMRNN(fluid.dygraph.Layer):
self.cell_array = [] self.cell_array = []
self.hidden_array = [] self.hidden_array = []
def _build_once(self, input_embedding, init_hidden=None, init_cell=None): def build_once(self, input_embedding, init_hidden=None, init_cell=None):
self.weight_1_arr = [] self.weight_1_arr = []
self.weight_2_arr = [] self.weight_2_arr = []
self.bias_arr = [] self.bias_arr = []
...@@ -132,7 +131,7 @@ class SimpleLSTMRNN(fluid.dygraph.Layer): ...@@ -132,7 +131,7 @@ class SimpleLSTMRNN(fluid.dygraph.Layer):
return real_res, last_hidden, last_cell return real_res, last_hidden, last_cell
class PtbModel(fluid.dygraph.Layer): class PtbModel(fluid.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
hidden_size, hidden_size,
...@@ -177,7 +176,7 @@ class PtbModel(fluid.dygraph.Layer): ...@@ -177,7 +176,7 @@ class PtbModel(fluid.dygraph.Layer):
default_initializer=fluid.initializer.UniformInitializer( default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale)) low=-self.init_scale, high=self.init_scale))
def _build_once(self, input, label, init_hidden, init_cell): def build_once(self, input, label, init_hidden, init_cell):
pass pass
def forward(self, input, label, init_hidden, init_cell): def forward(self, input, label, init_hidden, init_cell):
...@@ -260,13 +259,13 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -260,13 +259,13 @@ class TestDygraphPtbRnn(unittest.TestCase):
init_cell) init_cell)
if i == 0: if i == 0:
for param in ptb_model.parameters(): for param in ptb_model.parameters():
dy_param_init[param.name] = param._numpy() dy_param_init[param.name] = param.numpy()
dy_loss._backward() dy_loss.backward()
sgd.minimize(dy_loss) sgd.minimize(dy_loss)
ptb_model.clear_gradients() ptb_model.clear_gradients()
if i == batch_num - 1: if i == batch_num - 1:
for param in ptb_model.parameters(): for param in ptb_model.parameters():
dy_param_updated[param.name] = param._numpy() dy_param_updated[param.name] = param.numpy()
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
...@@ -334,11 +333,11 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -334,11 +333,11 @@ class TestDygraphPtbRnn(unittest.TestCase):
static_param_updated[static_param_name_list[k - static_param_updated[static_param_name_list[k -
3]] = out[k] 3]] = out[k]
self.assertTrue(np.array_equal(static_loss_value, dy_loss._numpy())) self.assertTrue(np.array_equal(static_loss_value, dy_loss.numpy()))
self.assertTrue( self.assertTrue(
np.array_equal(static_last_cell_value, last_cell._numpy())) np.array_equal(static_last_cell_value, last_cell.numpy()))
self.assertTrue( self.assertTrue(
np.array_equal(static_last_hidden_value, last_hidden._numpy())) np.array_equal(static_last_hidden_value, last_hidden.numpy()))
for key, value in six.iteritems(static_param_init): for key, value in six.iteritems(static_param_init):
self.assertTrue(np.array_equal(value, dy_param_init[key])) self.assertTrue(np.array_equal(value, dy_param_init[key]))
for key, value in six.iteritems(static_param_updated): for key, value in six.iteritems(static_param_updated):
......
...@@ -21,7 +21,7 @@ import paddle ...@@ -21,7 +21,7 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC from paddle.fluid import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
...@@ -68,7 +68,7 @@ def optimizer_setting(params): ...@@ -68,7 +68,7 @@ def optimizer_setting(params):
return optimizer return optimizer
class ConvBNLayer(fluid.dygraph.Layer): class ConvBNLayer(fluid.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -99,7 +99,7 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -99,7 +99,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
return y return y
class BottleneckBlock(fluid.dygraph.Layer): class BottleneckBlock(fluid.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -156,7 +156,7 @@ class BottleneckBlock(fluid.dygraph.Layer): ...@@ -156,7 +156,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
return layer_helper.append_activation(y) return layer_helper.append_activation(y)
class ResNet(fluid.dygraph.Layer): class ResNet(fluid.Layer):
def __init__(self, name_scope, layers=50, class_dim=102): def __init__(self, name_scope, layers=50, class_dim=102):
super(ResNet, self).__init__(name_scope) super(ResNet, self).__init__(name_scope)
...@@ -247,7 +247,7 @@ class TestDygraphResnet(unittest.TestCase): ...@@ -247,7 +247,7 @@ class TestDygraphResnet(unittest.TestCase):
dy_param_init_value = {} dy_param_init_value = {}
for param in resnet.parameters(): for param in resnet.parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param.numpy()
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
if batch_id >= batch_num: if batch_id >= batch_num:
...@@ -260,20 +260,20 @@ class TestDygraphResnet(unittest.TestCase): ...@@ -260,20 +260,20 @@ class TestDygraphResnet(unittest.TestCase):
img = to_variable(dy_x_data) img = to_variable(dy_x_data)
label = to_variable(y_data) label = to_variable(y_data)
label._stop_gradient = True label.stop_gradient = True
out = resnet(img) out = resnet(img)
loss = fluid.layers.cross_entropy(input=out, label=label) loss = fluid.layers.cross_entropy(input=out, label=label)
avg_loss = fluid.layers.mean(x=loss) avg_loss = fluid.layers.mean(x=loss)
dy_out = avg_loss._numpy() dy_out = avg_loss.numpy()
if batch_id == 0: if batch_id == 0:
for param in resnet.parameters(): for param in resnet.parameters():
if param.name not in dy_param_init_value: if param.name not in dy_param_init_value:
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param.numpy()
avg_loss._backward() avg_loss.backward()
dy_grad_value = {} dy_grad_value = {}
for param in resnet.parameters(): for param in resnet.parameters():
...@@ -288,7 +288,7 @@ class TestDygraphResnet(unittest.TestCase): ...@@ -288,7 +288,7 @@ class TestDygraphResnet(unittest.TestCase):
dy_param_value = {} dy_param_value = {}
for param in resnet.parameters(): for param in resnet.parameters():
dy_param_value[param.name] = param._numpy() dy_param_value[param.name] = param.numpy()
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
......
...@@ -333,7 +333,7 @@ class TestImperativeResneXt(unittest.TestCase): ...@@ -333,7 +333,7 @@ class TestImperativeResneXt(unittest.TestCase):
dy_param_init_value = {} dy_param_init_value = {}
for param in se_resnext.parameters(): for param in se_resnext.parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param.numpy()
for epoch_id in range(epoch_num): for epoch_id in range(epoch_num):
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
...@@ -349,19 +349,19 @@ class TestImperativeResneXt(unittest.TestCase): ...@@ -349,19 +349,19 @@ class TestImperativeResneXt(unittest.TestCase):
img = to_variable(dy_x_data) img = to_variable(dy_x_data)
label = to_variable(y_data) label = to_variable(y_data)
label._stop_gradient = True label.stop_gradient = True
out = se_resnext(img) out = se_resnext(img)
loss = fluid.layers.cross_entropy(input=out, label=label) loss = fluid.layers.cross_entropy(input=out, label=label)
avg_loss = fluid.layers.mean(x=loss) avg_loss = fluid.layers.mean(x=loss)
dy_out = avg_loss._numpy() dy_out = avg_loss.numpy()
if batch_id == 0: if batch_id == 0:
for param in se_resnext.parameters(): for param in se_resnext.parameters():
if param.name not in dy_param_init_value: if param.name not in dy_param_init_value:
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param.numpy()
avg_loss._backward() avg_loss.backward()
#dy_grad_value = {} #dy_grad_value = {}
#for param in se_resnext.parameters(): #for param in se_resnext.parameters():
...@@ -375,7 +375,7 @@ class TestImperativeResneXt(unittest.TestCase): ...@@ -375,7 +375,7 @@ class TestImperativeResneXt(unittest.TestCase):
dy_param_value = {} dy_param_value = {}
for param in se_resnext.parameters(): for param in se_resnext.parameters():
dy_param_value[param.name] = param._numpy() dy_param_value[param.name] = param.numpy()
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
......
...@@ -16,7 +16,8 @@ from __future__ import print_function ...@@ -16,7 +16,8 @@ from __future__ import print_function
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph import Embedding, LayerNorm, FC, to_variable, Layer, guard from paddle.fluid import Embedding, LayerNorm, FC, Layer
from paddle.fluid.dygraph import to_variable, guard
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid import core from paddle.fluid import core
import numpy as np import numpy as np
...@@ -116,7 +117,7 @@ class ModelHyperParams(object): ...@@ -116,7 +117,7 @@ class ModelHyperParams(object):
# to process after each sub-layer # to process after each sub-layer
postprocess_cmd = "da" # dropout + residual connection postprocess_cmd = "da" # dropout + residual connection
# random seed used in dropout for CE. # random seed used in dropout for CE.
dropout_seed = 1 dropout_seed = None
# the flag indicating whether to share embedding and softmax weights. # the flag indicating whether to share embedding and softmax weights.
# vocabularies in source and target should be same for weight sharing. # vocabularies in source and target should be same for weight sharing.
weight_sharing = True weight_sharing = True
...@@ -166,15 +167,21 @@ def create_data(is_static=False): ...@@ -166,15 +167,21 @@ def create_data(is_static=False):
] ]
else: else:
enc_inputs = [ enc_inputs = [
to_variable(src_word_np), to_variable(src_pos_np), to_variable(
to_variable(src_slf_attn_bias_np) src_word_np, name='src_word'), to_variable(
src_pos_np, name='src_pos'), to_variable(
src_slf_attn_bias_np, name='src_slf_attn_bias')
] ]
dec_inputs = [ dec_inputs = [
to_variable(trg_word_np), to_variable(trg_pos_np), to_variable(
to_variable(trg_slf_attn_bias_np), to_variable(trg_src_attn_bias_np) trg_word_np, name='trg_word'), to_variable(
trg_pos_np, name='trg_pos'), to_variable(
trg_slf_attn_bias_np, name='trg_slf_attn_bias'),
to_variable(
trg_src_attn_bias_np, name='trg_src_attn_bias')
] ]
label = to_variable(lbl_word_np) label = to_variable(lbl_word_np, name='lbl_word')
weight = to_variable(lbl_weight_np) weight = to_variable(lbl_weight_np, name='lbl_weight')
return enc_inputs, dec_inputs, label, weight return enc_inputs, dec_inputs, label, weight
...@@ -211,7 +218,7 @@ def make_all_inputs(input_fields): ...@@ -211,7 +218,7 @@ def make_all_inputs(input_fields):
# The placeholder for batch_size in compile time. Must be -1 currently to be # The placeholder for batch_size in compile time. Must be -1 currently to be
# consistent with some ops' infer-shape output in compile time, such as the # consistent with some ops' infer-shape output in compile time, such as the
# sequence_expand op used in beamsearch decoder. # sequence_expand op used in beamsearch decoder.
batch_size = 32 batch_size = -1
# The placeholder for squence length in compile time. # The placeholder for squence length in compile time.
seq_len = ModelHyperParams.max_length seq_len = ModelHyperParams.max_length
# Here list the data shapes and data types of all inputs. # Here list the data shapes and data types of all inputs.
...@@ -305,54 +312,40 @@ sync = False ...@@ -305,54 +312,40 @@ sync = False
# how many batches we use # how many batches we use
batch_num = 5 batch_num = 5
np.random.seed = 1 np.random.seed = 90
src_word_np = np.random.randint( src_word_np = np.random.randint(
1, 1,
ModelHyperParams.src_vocab_size - 1, ModelHyperParams.src_vocab_size - 1,
size=(batch_size, seq_len, 1), size=(TrainTaskConfig.batch_size, seq_len, 1),
dtype='int64') dtype='int64')
src_pos_np = np.random.randint( src_pos_np = np.random.randint(
1, seq_len, size=(batch_size, seq_len, 1), dtype='int64') 1, seq_len, size=(TrainTaskConfig.batch_size, seq_len, 1), dtype='int64')
src_slf_attn_bias_np = np.random.randn(batch_size, ModelHyperParams.n_head, src_slf_attn_bias_np = np.random.randn(TrainTaskConfig.batch_size,
seq_len, seq_len).astype('float32') ModelHyperParams.n_head, seq_len,
seq_len).astype('float32')
trg_word_np = np.random.randint( trg_word_np = np.random.randint(
1, 1,
ModelHyperParams.src_vocab_size - 1, ModelHyperParams.src_vocab_size - 1,
size=(batch_size, seq_len, 1), size=(TrainTaskConfig.batch_size, seq_len, 1),
dtype='int64') dtype='int64')
trg_pos_np = np.random.randint( trg_pos_np = np.random.randint(
1, seq_len, size=(batch_size, seq_len, 1), dtype='int64') 1, seq_len, size=(TrainTaskConfig.batch_size, seq_len, 1), dtype='int64')
trg_slf_attn_bias_np = np.random.randn(batch_size, ModelHyperParams.n_head, trg_slf_attn_bias_np = np.random.randn(TrainTaskConfig.batch_size,
seq_len, seq_len).astype('float32') ModelHyperParams.n_head, seq_len,
trg_src_attn_bias_np = np.random.randn(batch_size, ModelHyperParams.n_head, seq_len).astype('float32')
seq_len, seq_len).astype('float32') trg_src_attn_bias_np = np.random.randn(TrainTaskConfig.batch_size,
ModelHyperParams.n_head, seq_len,
seq_len).astype('float32')
lbl_word_np = np.random.randint( lbl_word_np = np.random.randint(
1, 1,
ModelHyperParams.src_vocab_size - 1, ModelHyperParams.src_vocab_size - 1,
size=(batch_size * seq_len, 1), size=(TrainTaskConfig.batch_size * seq_len, 1),
dtype='int64') dtype='int64')
lbl_weight_np = np.random.randn(batch_size * seq_len, 1).astype('float32') lbl_weight_np = np.random.randn(TrainTaskConfig.batch_size * seq_len,
1).astype('float32')
# np.random.seed = 1
# src_word_np = np.arange(0, 10).reshape([batch_size, seq_len, 1]).astype('int64')
# src_pos_np = np.random.randint(
# 1, seq_len, size=(batch_size, seq_len, 1), dtype='int64')
# src_slf_attn_bias_np = np.random.randn(batch_size, ModelHyperParams.n_head,
# seq_len, seq_len).astype('float32')
#
# trg_word_np = np.arange(0, 10).reshape([batch_size, seq_len, 1]).astype('int64')
# trg_pos_np = np.random.randint(
# 1, seq_len, size=(batch_size, seq_len, 1), dtype='int64')
# trg_slf_attn_bias_np = np.random.randn(batch_size, ModelHyperParams.n_head,
# seq_len, seq_len).astype('float32')
# trg_src_attn_bias_np = np.random.randn(batch_size, ModelHyperParams.n_head,
# seq_len, seq_len).astype('float32')
#
# lbl_word_np = np.arange(0, 10).reshape([batch_size * seq_len, 1]).astype('int64')
# lbl_weight_np = np.random.randn(batch_size * seq_len, 1).astype('float32')
#
pos_inp1 = position_encoding_init(ModelHyperParams.max_length, pos_inp1 = position_encoding_init(ModelHyperParams.max_length,
ModelHyperParams.d_model) ModelHyperParams.d_model)
pos_inp2 = position_encoding_init(ModelHyperParams.max_length, pos_inp2 = position_encoding_init(ModelHyperParams.max_length,
...@@ -466,7 +459,7 @@ class MultiHeadAttentionLayer(Layer): ...@@ -466,7 +459,7 @@ class MultiHeadAttentionLayer(Layer):
x=v, shape=[0, 0, self._n_head, self._d_value], inplace=False) x=v, shape=[0, 0, self._n_head, self._d_value], inplace=False)
transpose_v = fluid.layers.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) transpose_v = fluid.layers.transpose(x=reshaped_v, perm=[0, 2, 1, 3])
#scale dot product attention # scale dot product attention
product = fluid.layers.matmul( product = fluid.layers.matmul(
x=transpose_q, x=transpose_q,
y=transpose_k, y=transpose_k,
...@@ -739,7 +732,7 @@ class DecoderSubLayer(Layer): ...@@ -739,7 +732,7 @@ class DecoderSubLayer(Layer):
enc_attn_output_pp = self._multihead_attention_layer2( enc_attn_output_pp = self._multihead_attention_layer2(
pre_process_rlt2, enc_output, enc_output, dec_enc_attn_bias) pre_process_rlt2, enc_output, enc_output, dec_enc_attn_bias)
enc_attn_output = self._post_process_layer2( enc_attn_output = self._post_process_layer2(
slf_attn_output, enc_attn_output_pp, self._postprocess_cmd, slf_attn_output_pp, enc_attn_output_pp, self._postprocess_cmd,
self._prepostprcess_dropout) self._prepostprcess_dropout)
pre_process_rlt3 = self._pre_process_layer3(None, enc_attn_output, pre_process_rlt3 = self._pre_process_layer3(None, enc_attn_output,
self._preprocess_cmd, self._preprocess_cmd,
...@@ -990,16 +983,18 @@ class TestDygraphTransformer(unittest.TestCase): ...@@ -990,16 +983,18 @@ class TestDygraphTransformer(unittest.TestCase):
enc_inputs, dec_inputs, label, weights = create_data() enc_inputs, dec_inputs, label, weights = create_data()
dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer( dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer(
enc_inputs, dec_inputs, label, weights) enc_inputs, dec_inputs, label, weights)
if i == 0: if i == 0:
for param in transformer.parameters(): for param in transformer.parameters():
dy_param_init[param.name] = param._numpy() dy_param_init[param.name] = param.numpy()
dy_avg_cost._backward() dy_avg_cost.backward()
optimizer.minimize(dy_avg_cost) optimizer.minimize(dy_avg_cost)
transformer.clear_gradients() transformer.clear_gradients()
if i == batch_num - 1: if i == batch_num - 1:
for param in transformer.parameters(): for param in transformer.parameters():
dy_param_updated[param.name] = param._numpy() dy_param_updated[param.name] = param.numpy()
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
...@@ -1043,7 +1038,6 @@ class TestDygraphTransformer(unittest.TestCase): ...@@ -1043,7 +1038,6 @@ class TestDygraphTransformer(unittest.TestCase):
static_param_name_list = list() static_param_name_list = list()
static_sum_cost, static_avg_cost, static_predict, static_token_num = transformer( static_sum_cost, static_avg_cost, static_predict, static_token_num = transformer(
enc_inputs, dec_inputs, label, weights) enc_inputs, dec_inputs, label, weights)
optimizer.minimize(static_avg_cost) optimizer.minimize(static_avg_cost)
for param in transformer.parameters(): for param in transformer.parameters():
static_param_name_list.append(param.name) static_param_name_list.append(param.name)
...@@ -1061,8 +1055,8 @@ class TestDygraphTransformer(unittest.TestCase): ...@@ -1061,8 +1055,8 @@ class TestDygraphTransformer(unittest.TestCase):
static_sum_cost, static_avg_cost, static_predict, static_sum_cost, static_avg_cost, static_predict,
static_token_num static_token_num
] ]
fetch_list.extend(static_param_name_list)
fetch_list.extend(static_param_name_list)
out = exe.run(fluid.default_main_program(), out = exe.run(fluid.default_main_program(),
feed=feed_dict, feed=feed_dict,
fetch_list=fetch_list) fetch_list=fetch_list)
...@@ -1076,13 +1070,14 @@ class TestDygraphTransformer(unittest.TestCase): ...@@ -1076,13 +1070,14 @@ class TestDygraphTransformer(unittest.TestCase):
4]] = out[k] 4]] = out[k]
self.assertTrue( self.assertTrue(
np.array_equal(static_avg_cost_value, dy_avg_cost._numpy())) np.array_equal(static_avg_cost_value, dy_avg_cost.numpy()))
self.assertTrue( self.assertTrue(
np.array_equal(static_sum_cost_value, dy_sum_cost._numpy())) np.array_equal(static_sum_cost_value, dy_sum_cost.numpy()))
self.assertTrue( self.assertTrue(
np.array_equal(static_predict_value, dy_predict._numpy())) np.array_equal(static_predict_value, dy_predict.numpy()))
self.assertTrue( self.assertTrue(
np.array_equal(static_token_num_value, dy_token_num._numpy())) np.array_equal(static_token_num_value, dy_token_num.numpy()))
for key, value in six.iteritems(static_param_init): for key, value in six.iteritems(static_param_init):
self.assertTrue(np.array_equal(value, dy_param_init[key])) self.assertTrue(np.array_equal(value, dy_param_init[key]))
for key, value in six.iteritems(static_param_updated): for key, value in six.iteritems(static_param_updated):
......
...@@ -114,7 +114,7 @@ class TestLayer(LayerTest): ...@@ -114,7 +114,7 @@ class TestLayer(LayerTest):
dy_ret = fc2(ret) dy_ret = fc2(ret)
self.assertTrue(np.array_equal(static_ret, static_ret2)) self.assertTrue(np.array_equal(static_ret, static_ret2))
self.assertTrue(np.array_equal(static_ret, dy_ret._numpy())) self.assertTrue(np.array_equal(static_ret, dy_ret.numpy()))
def test_layer_norm(self): def test_layer_norm(self):
inp = np.ones([3, 32, 32], dtype='float32') inp = np.ones([3, 32, 32], dtype='float32')
...@@ -142,7 +142,7 @@ class TestLayer(LayerTest): ...@@ -142,7 +142,7 @@ class TestLayer(LayerTest):
dy_ret = lm(base.to_variable(inp)) dy_ret = lm(base.to_variable(inp))
self.assertTrue(np.allclose(static_ret, static_ret2)) self.assertTrue(np.allclose(static_ret, static_ret2))
self.assertTrue(np.allclose(dy_ret._numpy(), static_ret2)) self.assertTrue(np.allclose(dy_ret.numpy(), static_ret2))
def test_relu(self): def test_relu(self):
with self.static_graph(): with self.static_graph():
...@@ -156,7 +156,7 @@ class TestLayer(LayerTest): ...@@ -156,7 +156,7 @@ class TestLayer(LayerTest):
t = np.ones([3, 3], dtype='float32') t = np.ones([3, 3], dtype='float32')
dy_ret = layers.relu(base.to_variable(t)) dy_ret = layers.relu(base.to_variable(t))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
def test_matmul(self): def test_matmul(self):
with self.static_graph(): with self.static_graph():
...@@ -177,7 +177,7 @@ class TestLayer(LayerTest): ...@@ -177,7 +177,7 @@ class TestLayer(LayerTest):
t2 = np.ones([3, 3], dtype='float32') t2 = np.ones([3, 3], dtype='float32')
dy_ret = layers.matmul(base.to_variable(t), base.to_variable(t2)) dy_ret = layers.matmul(base.to_variable(t), base.to_variable(t2))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
def test_conv2d(self): def test_conv2d(self):
with self.static_graph(): with self.static_graph():
...@@ -204,7 +204,7 @@ class TestLayer(LayerTest): ...@@ -204,7 +204,7 @@ class TestLayer(LayerTest):
'conv2d', num_channels=3, num_filters=3, filter_size=[2, 2]) 'conv2d', num_channels=3, num_filters=3, filter_size=[2, 2])
dy_ret = conv2d(base.to_variable(images)) dy_ret = conv2d(base.to_variable(images))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
self.assertTrue(np.allclose(static_ret, static_ret2)) self.assertTrue(np.allclose(static_ret, static_ret2))
def test_gru_unit(self): def test_gru_unit(self):
...@@ -246,7 +246,7 @@ class TestLayer(LayerTest): ...@@ -246,7 +246,7 @@ class TestLayer(LayerTest):
for i in range(len(static_ret)): for i in range(len(static_ret)):
self.assertTrue(np.allclose(static_ret[i], static_ret2[i])) self.assertTrue(np.allclose(static_ret[i], static_ret2[i]))
self.assertTrue(np.allclose(static_ret[i], dy_ret[i]._numpy())) self.assertTrue(np.allclose(static_ret[i], dy_ret[i].numpy()))
def test_elementwise_math(self): def test_elementwise_math(self):
n = np.ones([3, 3], dtype='float32') n = np.ones([3, 3], dtype='float32')
...@@ -288,8 +288,8 @@ class TestLayer(LayerTest): ...@@ -288,8 +288,8 @@ class TestLayer(LayerTest):
ret = layers.elementwise_sub(ret, n5) ret = layers.elementwise_sub(ret, n5)
dy_ret = layers.elementwise_mul(ret, n6) dy_ret = layers.elementwise_mul(ret, n6)
self.assertTrue( self.assertTrue(
np.allclose(static_ret, dy_ret._numpy()), np.allclose(static_ret, dy_ret.numpy()),
'%s vs %s' % (static_ret, dy_ret._numpy())) '%s vs %s' % (static_ret, dy_ret.numpy()))
def test_elementwise_minmax(self): def test_elementwise_minmax(self):
n = np.ones([3, 3], dtype='float32') n = np.ones([3, 3], dtype='float32')
...@@ -299,8 +299,8 @@ class TestLayer(LayerTest): ...@@ -299,8 +299,8 @@ class TestLayer(LayerTest):
min_ret = layers.elementwise_min(n, n2) min_ret = layers.elementwise_min(n, n2)
max_ret = layers.elementwise_max(n, n2) max_ret = layers.elementwise_max(n, n2)
self.assertTrue(np.allclose(n, min_ret._numpy())) self.assertTrue(np.allclose(n, min_ret.numpy()))
self.assertTrue(np.allclose(n2, max_ret._numpy())) self.assertTrue(np.allclose(n2, max_ret.numpy()))
def test_sequence_conv(self): def test_sequence_conv(self):
inp_np = np.arange(12).reshape([3, 4]).astype('float32') inp_np = np.arange(12).reshape([3, 4]).astype('float32')
...@@ -367,7 +367,7 @@ class TestLayer(LayerTest): ...@@ -367,7 +367,7 @@ class TestLayer(LayerTest):
'conv2d_transpose', num_filters=10, output_size=28) 'conv2d_transpose', num_filters=10, output_size=28)
dy_rlt = conv2d_transpose(base.to_variable(inp_np)) dy_rlt = conv2d_transpose(base.to_variable(inp_np))
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
def test_bilinear_tensor_product(self): def test_bilinear_tensor_product(self):
inp_np_x = np.array([[1, 2, 3]]).astype('float32') inp_np_x = np.array([[1, 2, 3]]).astype('float32')
...@@ -410,7 +410,7 @@ class TestLayer(LayerTest): ...@@ -410,7 +410,7 @@ class TestLayer(LayerTest):
dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y)) dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y))
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
def test_prelu(self): def test_prelu(self):
inp_np = np.ones([5, 200, 100, 100]).astype('float32') inp_np = np.ones([5, 200, 100, 100]).astype('float32')
...@@ -451,7 +451,7 @@ class TestLayer(LayerTest): ...@@ -451,7 +451,7 @@ class TestLayer(LayerTest):
dy_rlt = prelu(base.to_variable(inp_np)) dy_rlt = prelu(base.to_variable(inp_np))
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
def test_embeding(self): def test_embeding(self):
inp_word = np.array([[[1]]]).astype('int64') inp_word = np.array([[[1]]]).astype('int64')
...@@ -484,7 +484,7 @@ class TestLayer(LayerTest): ...@@ -484,7 +484,7 @@ class TestLayer(LayerTest):
static_rlt3 = emb2(base.to_variable(inp_word)) static_rlt3 = emb2(base.to_variable(inp_word))
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(static_rlt3._numpy(), static_rlt)) self.assertTrue(np.allclose(static_rlt3.numpy(), static_rlt))
def test_nce(self): def test_nce(self):
window_size = 5 window_size = 5
...@@ -598,7 +598,7 @@ class TestLayer(LayerTest): ...@@ -598,7 +598,7 @@ class TestLayer(LayerTest):
nce_loss3 = nce(embs3, words[label_word]) nce_loss3 = nce(embs3, words[label_word])
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(nce_loss3._numpy(), static_rlt)) self.assertTrue(np.allclose(nce_loss3.numpy(), static_rlt))
def test_conv3d(self): def test_conv3d(self):
with self.static_graph(): with self.static_graph():
...@@ -625,7 +625,7 @@ class TestLayer(LayerTest): ...@@ -625,7 +625,7 @@ class TestLayer(LayerTest):
conv3d = nn.Conv3D('conv3d', num_filters=3, filter_size=2) conv3d = nn.Conv3D('conv3d', num_filters=3, filter_size=2)
dy_ret = conv3d(base.to_variable(images)) dy_ret = conv3d(base.to_variable(images))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
self.assertTrue(np.allclose(static_ret, static_ret2)) self.assertTrue(np.allclose(static_ret, static_ret2))
def test_row_conv(self): def test_row_conv(self):
...@@ -719,7 +719,7 @@ class TestLayer(LayerTest): ...@@ -719,7 +719,7 @@ class TestLayer(LayerTest):
groupNorm = nn.GroupNorm('GroupNorm', groups=2) groupNorm = nn.GroupNorm('GroupNorm', groups=2)
dy_ret = groupNorm(base.to_variable(input)) dy_ret = groupNorm(base.to_variable(input))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
self.assertTrue(np.allclose(static_ret, static_ret2)) self.assertTrue(np.allclose(static_ret, static_ret2))
def test_spectral_norm(self): def test_spectral_norm(self):
...@@ -769,7 +769,7 @@ class TestLayer(LayerTest): ...@@ -769,7 +769,7 @@ class TestLayer(LayerTest):
spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2) spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2)
dy_ret = spectralNorm(base.to_variable(input)) dy_ret = spectralNorm(base.to_variable(input))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
self.assertTrue(np.allclose(static_ret, static_ret2)) self.assertTrue(np.allclose(static_ret, static_ret2))
def test_tree_conv(self): def test_tree_conv(self):
...@@ -842,7 +842,7 @@ class TestLayer(LayerTest): ...@@ -842,7 +842,7 @@ class TestLayer(LayerTest):
dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj)) dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj))
self.assertTrue(np.allclose(static_ret, static_ret2)) self.assertTrue(np.allclose(static_ret, static_ret2))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
def test_conv3d_transpose(self): def test_conv3d_transpose(self):
input_array = np.arange(0, 48).reshape( input_array = np.arange(0, 48).reshape(
...@@ -872,7 +872,7 @@ class TestLayer(LayerTest): ...@@ -872,7 +872,7 @@ class TestLayer(LayerTest):
use_cudnn=False) use_cudnn=False)
dy_rlt = conv3d_transpose(base.to_variable(input_array)) dy_rlt = conv3d_transpose(base.to_variable(input_array))
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
class TestBook(LayerTest): class TestBook(LayerTest):
...@@ -907,7 +907,7 @@ class TestBook(LayerTest): ...@@ -907,7 +907,7 @@ class TestBook(LayerTest):
if isinstance(dy_result, tuple): if isinstance(dy_result, tuple):
dy_result = dy_result[0] dy_result = dy_result[0]
self.assertTrue(np.array_equal(static_result[0], dy_result._numpy())) self.assertTrue(np.array_equal(static_result[0], dy_result.numpy()))
def _get_np_data(self, shape, dtype, append_batch_size=True): def _get_np_data(self, shape, dtype, append_batch_size=True):
np.random.seed(self.seed) np.random.seed(self.seed)
...@@ -1925,6 +1925,13 @@ class TestBook(LayerTest): ...@@ -1925,6 +1925,13 @@ class TestBook(LayerTest):
out = layers.flatten(x, axis=1, name="flatten") out = layers.flatten(x, axis=1, name="flatten")
return (out) return (out)
def test_linspace(self):
program = Program()
with program_guard(program):
out = layers.linspace(20, 10, 5, 'float64')
self.assertIsNotNone(out)
print(str(program))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from op_test import OpTest
class TestLinspaceOpCommonCase(OpTest):
def setUp(self):
self.op_type = "linspace"
dtype = 'float32'
self.inputs = {
'Start': np.array([0]).astype(dtype),
'Stop': np.array([10]).astype(dtype),
'Num': np.array([11]).astype('int32')
}
self.outputs = {'Out': np.arange(0, 11).astype(dtype)}
def test_check_output(self):
self.check_output()
class TestLinspaceOpReverseCase(OpTest):
def setUp(self):
self.op_type = "linspace"
dtype = 'float32'
self.inputs = {
'Start': np.array([10]).astype(dtype),
'Stop': np.array([0]).astype(dtype),
'Num': np.array([11]).astype('int32')
}
self.outputs = {'Out': np.arange(10, -1, -1).astype(dtype)}
def test_check_output(self):
self.check_output()
class TestLinspaceOpNumOneCase(OpTest):
def setUp(self):
self.op_type = "linspace"
dtype = 'float32'
self.inputs = {
'Start': np.array([10]).astype(dtype),
'Stop': np.array([0]).astype(dtype),
'Num': np.array([1]).astype('int32')
}
self.outputs = {'Out': np.array(10, dtype=dtype)}
def test_check_output(self):
self.check_output()
if __name__ == "__main__":
unittest.main()
...@@ -73,7 +73,14 @@ class TestNearestInterpOp(OpTest): ...@@ -73,7 +73,14 @@ class TestNearestInterpOp(OpTest):
self.op_type = "nearest_interp" self.op_type = "nearest_interp"
input_np = np.random.random(self.input_shape).astype("float32") input_np = np.random.random(self.input_shape).astype("float32")
output_np = nearest_neighbor_interp_np(input_np, self.out_h, self.out_w, if self.scale > 0:
out_h = int(self.input_shape[2] * self.scale)
out_w = int(self.input_shape[3] * self.scale)
else:
out_h = self.out_h
out_w = self.out_w
output_np = nearest_neighbor_interp_np(input_np, out_h, out_w,
self.out_size, self.actual_shape, self.out_size, self.actual_shape,
self.align_corners) self.align_corners)
self.inputs = {'X': input_np} self.inputs = {'X': input_np}
...@@ -84,6 +91,7 @@ class TestNearestInterpOp(OpTest): ...@@ -84,6 +91,7 @@ class TestNearestInterpOp(OpTest):
self.attrs = { self.attrs = {
'out_h': self.out_h, 'out_h': self.out_h,
'out_w': self.out_w, 'out_w': self.out_w,
'scale': self.scale,
'interp_method': self.interp_method, 'interp_method': self.interp_method,
'align_corners': self.align_corners, 'align_corners': self.align_corners,
} }
...@@ -100,6 +108,7 @@ class TestNearestInterpOp(OpTest): ...@@ -100,6 +108,7 @@ class TestNearestInterpOp(OpTest):
self.input_shape = [2, 3, 4, 4] self.input_shape = [2, 3, 4, 4]
self.out_h = 2 self.out_h = 2
self.out_w = 2 self.out_w = 2
self.scale = 0.
self.out_size = np.array([3, 3]).astype("int32") self.out_size = np.array([3, 3]).astype("int32")
self.align_corners = True self.align_corners = True
...@@ -110,6 +119,7 @@ class TestNearestNeighborInterpCase1(TestNearestInterpOp): ...@@ -110,6 +119,7 @@ class TestNearestNeighborInterpCase1(TestNearestInterpOp):
self.input_shape = [4, 1, 7, 8] self.input_shape = [4, 1, 7, 8]
self.out_h = 1 self.out_h = 1
self.out_w = 1 self.out_w = 1
self.scale = 0.
self.align_corners = True self.align_corners = True
...@@ -119,6 +129,7 @@ class TestNearestNeighborInterpCase2(TestNearestInterpOp): ...@@ -119,6 +129,7 @@ class TestNearestNeighborInterpCase2(TestNearestInterpOp):
self.input_shape = [3, 3, 9, 6] self.input_shape = [3, 3, 9, 6]
self.out_h = 12 self.out_h = 12
self.out_w = 12 self.out_w = 12
self.scale = 0.
self.align_corners = True self.align_corners = True
...@@ -128,6 +139,7 @@ class TestNearestNeighborInterpCase3(TestNearestInterpOp): ...@@ -128,6 +139,7 @@ class TestNearestNeighborInterpCase3(TestNearestInterpOp):
self.input_shape = [1, 1, 128, 64] self.input_shape = [1, 1, 128, 64]
self.out_h = 64 self.out_h = 64
self.out_w = 128 self.out_w = 128
self.scale = 0.
self.align_corners = True self.align_corners = True
...@@ -137,6 +149,7 @@ class TestNearestNeighborInterpCase4(TestNearestInterpOp): ...@@ -137,6 +149,7 @@ class TestNearestNeighborInterpCase4(TestNearestInterpOp):
self.input_shape = [4, 1, 7, 8] self.input_shape = [4, 1, 7, 8]
self.out_h = 1 self.out_h = 1
self.out_w = 1 self.out_w = 1
self.scale = 0.
self.out_size = np.array([2, 2]).astype("int32") self.out_size = np.array([2, 2]).astype("int32")
self.align_corners = True self.align_corners = True
...@@ -147,6 +160,7 @@ class TestNearestNeighborInterpCase5(TestNearestInterpOp): ...@@ -147,6 +160,7 @@ class TestNearestNeighborInterpCase5(TestNearestInterpOp):
self.input_shape = [3, 3, 9, 6] self.input_shape = [3, 3, 9, 6]
self.out_h = 12 self.out_h = 12
self.out_w = 12 self.out_w = 12
self.scale = 0.
self.out_size = np.array([11, 11]).astype("int32") self.out_size = np.array([11, 11]).astype("int32")
self.align_corners = True self.align_corners = True
...@@ -157,6 +171,7 @@ class TestNearestNeighborInterpCase6(TestNearestInterpOp): ...@@ -157,6 +171,7 @@ class TestNearestNeighborInterpCase6(TestNearestInterpOp):
self.input_shape = [1, 1, 128, 64] self.input_shape = [1, 1, 128, 64]
self.out_h = 64 self.out_h = 64
self.out_w = 128 self.out_w = 128
self.scale = 0.
self.out_size = np.array([65, 129]).astype("int32") self.out_size = np.array([65, 129]).astype("int32")
self.align_corners = True self.align_corners = True
...@@ -167,6 +182,7 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp): ...@@ -167,6 +182,7 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp):
self.input_shape = [3, 2, 32, 16] self.input_shape = [3, 2, 32, 16]
self.out_h = 64 self.out_h = 64
self.out_w = 32 self.out_w = 32
self.scale = 0.
self.out_size = np.array([66, 40]).astype("int32") self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = True self.align_corners = True
...@@ -179,7 +195,15 @@ class TestNearestInterpOpUint8(OpTest): ...@@ -179,7 +195,15 @@ class TestNearestInterpOpUint8(OpTest):
self.op_type = "nearest_interp" self.op_type = "nearest_interp"
input_np = np.random.randint( input_np = np.random.randint(
low=0, high=256, size=self.input_shape).astype("uint8") low=0, high=256, size=self.input_shape).astype("uint8")
output_np = nearest_neighbor_interp_np(input_np, self.out_h, self.out_w,
if self.scale > 0:
out_h = int(self.input_shape[2] * self.scale)
out_w = int(self.input_shape[3] * self.scale)
else:
out_h = self.out_h
out_w = self.out_w
output_np = nearest_neighbor_interp_np(input_np, out_h, out_w,
self.out_size, self.actual_shape, self.out_size, self.actual_shape,
self.align_corners) self.align_corners)
self.inputs = {'X': input_np} self.inputs = {'X': input_np}
...@@ -188,6 +212,7 @@ class TestNearestInterpOpUint8(OpTest): ...@@ -188,6 +212,7 @@ class TestNearestInterpOpUint8(OpTest):
self.attrs = { self.attrs = {
'out_h': self.out_h, 'out_h': self.out_h,
'out_w': self.out_w, 'out_w': self.out_w,
'scale': self.scale,
'interp_method': self.interp_method, 'interp_method': self.interp_method,
'align_corners': self.align_corners 'align_corners': self.align_corners
} }
...@@ -201,6 +226,7 @@ class TestNearestInterpOpUint8(OpTest): ...@@ -201,6 +226,7 @@ class TestNearestInterpOpUint8(OpTest):
self.input_shape = [1, 3, 9, 6] self.input_shape = [1, 3, 9, 6]
self.out_h = 10 self.out_h = 10
self.out_w = 9 self.out_w = 9
self.scale = 0.
self.align_corners = True self.align_corners = True
...@@ -210,6 +236,7 @@ class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8): ...@@ -210,6 +236,7 @@ class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8):
self.input_shape = [2, 3, 128, 64] self.input_shape = [2, 3, 128, 64]
self.out_h = 120 self.out_h = 120
self.out_w = 50 self.out_w = 50
self.scale = 0.
self.align_corners = True self.align_corners = True
...@@ -219,6 +246,7 @@ class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8): ...@@ -219,6 +246,7 @@ class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8):
self.input_shape = [4, 1, 7, 8] self.input_shape = [4, 1, 7, 8]
self.out_h = 5 self.out_h = 5
self.out_w = 13 self.out_w = 13
self.scale = 0.
self.out_size = np.array([6, 15]).astype("int32") self.out_size = np.array([6, 15]).astype("int32")
self.align_corners = True self.align_corners = True
...@@ -228,5 +256,38 @@ class TestNearestInterpWithoutCorners(TestNearestInterpOp): ...@@ -228,5 +256,38 @@ class TestNearestInterpWithoutCorners(TestNearestInterpOp):
self.align_corners = False self.align_corners = False
class TestNearestNeighborInterpScale1(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [3, 2, 32, 16]
self.out_h = 64
self.out_w = 32
self.scale = 2.
self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = True
class TestNearestNeighborInterpScale2(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [3, 2, 32, 16]
self.out_h = 64
self.out_w = 32
self.scale = 1.5
self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = True
class TestNearestNeighborInterpScale3(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [3, 2, 32, 16]
self.out_h = 64
self.out_w = 32
self.scale = 1.
self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = True
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -29,7 +29,7 @@ import unittest ...@@ -29,7 +29,7 @@ import unittest
import math import math
import numpy as np import numpy as np
from functools import partial from functools import partial
os.environ['CPU_NUM'] = str(4)
# FIXME(zcd): If the neural net has dropout_op, the output of ParallelExecutor # FIXME(zcd): If the neural net has dropout_op, the output of ParallelExecutor
# and Executor is different. Because, for ParallelExecutor, the dropout_op of # and Executor is different. Because, for ParallelExecutor, the dropout_op of
# the neural net will be copied N copies(N is the number of device). This will # the neural net will be copied N copies(N is the number of device). This will
...@@ -113,7 +113,6 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio): ...@@ -113,7 +113,6 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
return fluid.layers.elementwise_add(x=short, y=scale, act='relu') return fluid.layers.elementwise_add(x=short, y=scale, act='relu')
batch_size = 12
img_shape = [3, 224, 224] img_shape = [3, 224, 224]
...@@ -181,43 +180,84 @@ def optimizer(learning_rate=0.01): ...@@ -181,43 +180,84 @@ def optimizer(learning_rate=0.01):
return optimizer return optimizer
def _batch_size():
return 12
def _iter(use_cuda):
if use_cuda:
return 10
return 2
gpu_img, gpu_label = init_data(
batch_size=_batch_size(), img_shape=img_shape, label_range=999)
cpu_img, cpu_label = init_data(
batch_size=_batch_size(), img_shape=img_shape, label_range=999)
feed_dict_gpu = {"image": gpu_img, "label": gpu_label}
feed_dict_cpu = {"image": cpu_img, "label": cpu_label}
model = SE_ResNeXt50Small
def _feed_dict(use_cuda):
if use_cuda:
return feed_dict_gpu
return feed_dict_cpu
def _get_result_of_origin_model(use_cuda):
global remove_bn
global remove_dropout
remove_bn = True
remove_dropout = True
first_loss, last_loss = TestParallelExecutorBase.check_network_convergence(
model,
feed_dict=_feed_dict(use_cuda),
iter=_iter(use_cuda),
batch_size=_batch_size(),
use_cuda=use_cuda,
use_reduce=False,
optimizer=optimizer)
return first_loss, last_loss
origin_cpu_first_loss, origin_cpu_last_loss = _get_result_of_origin_model(False)
if core.is_compiled_with_cuda():
origin_gpu_first_loss, origin_gpu_last_loss = _get_result_of_origin_model(
True)
def _get_origin_result(use_cuda):
if use_cuda:
assert core.is_compiled_with_cuda(), "Doesn't compiled with CUDA."
return origin_gpu_first_loss, origin_gpu_last_loss
return origin_cpu_first_loss, origin_cpu_last_loss
class TestResnet(TestParallelExecutorBase): class TestResnet(TestParallelExecutorBase):
@classmethod def _compare_reduce_and_allreduce(self, use_cuda, delta2=1e-5):
def setUpClass(cls):
os.environ['CPU_NUM'] = str(4)
global remove_dropout
global remove_bn
remove_dropout = False
remove_bn = False
def _compare_reduce_and_allreduce(self,
model,
use_cuda,
iter=20,
delta2=1e-5):
if use_cuda and not core.is_compiled_with_cuda(): if use_cuda and not core.is_compiled_with_cuda():
return return
global remove_bn global remove_bn
global remove_dropout
remove_bn = True remove_bn = True
remove_dropout = True
img, label = init_data(
batch_size=batch_size, img_shape=img_shape, label_range=999)
all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence( all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence(
model, model,
feed_dict={"image": img, feed_dict=_feed_dict(use_cuda),
"label": label}, iter=_iter(use_cuda),
iter=iter, batch_size=_batch_size(),
batch_size=batch_size,
use_cuda=use_cuda, use_cuda=use_cuda,
use_reduce=False, use_reduce=False,
optimizer=optimizer) optimizer=optimizer)
reduce_first_loss, reduce_last_loss = self.check_network_convergence( reduce_first_loss, reduce_last_loss = self.check_network_convergence(
model, model,
feed_dict={"image": img, feed_dict=_feed_dict(use_cuda),
"label": label}, iter=_iter(use_cuda),
iter=iter, batch_size=_batch_size(),
batch_size=batch_size,
use_cuda=use_cuda, use_cuda=use_cuda,
use_reduce=True, use_reduce=True,
optimizer=optimizer) optimizer=optimizer)
...@@ -232,10 +272,9 @@ class TestResnet(TestParallelExecutorBase): ...@@ -232,10 +272,9 @@ class TestResnet(TestParallelExecutorBase):
all_reduce_first_loss_seq, all_reduce_last_loss_seq = self.check_network_convergence( all_reduce_first_loss_seq, all_reduce_last_loss_seq = self.check_network_convergence(
model, model,
feed_dict={"image": img, feed_dict=_feed_dict(use_cuda),
"label": label}, iter=_iter(use_cuda),
iter=iter, batch_size=_batch_size(),
batch_size=batch_size,
use_cuda=use_cuda, use_cuda=use_cuda,
use_reduce=False, use_reduce=False,
optimizer=optimizer, optimizer=optimizer,
...@@ -243,10 +282,9 @@ class TestResnet(TestParallelExecutorBase): ...@@ -243,10 +282,9 @@ class TestResnet(TestParallelExecutorBase):
reduce_first_loss_seq, reduce_last_loss_seq = self.check_network_convergence( reduce_first_loss_seq, reduce_last_loss_seq = self.check_network_convergence(
model, model,
feed_dict={"image": img, feed_dict=_feed_dict(use_cuda),
"label": label}, iter=_iter(use_cuda),
iter=iter, batch_size=_batch_size(),
batch_size=batch_size,
use_cuda=use_cuda, use_cuda=use_cuda,
use_reduce=True, use_reduce=True,
optimizer=optimizer, optimizer=optimizer,
...@@ -267,37 +305,28 @@ class TestResnet(TestParallelExecutorBase): ...@@ -267,37 +305,28 @@ class TestResnet(TestParallelExecutorBase):
for loss in zip(all_reduce_last_loss_seq, reduce_last_loss_seq): for loss in zip(all_reduce_last_loss_seq, reduce_last_loss_seq):
self.assertAlmostEquals(loss[0], loss[1], delta=delta2) self.assertAlmostEquals(loss[0], loss[1], delta=delta2)
def _check_resnet_convergence(self, def _compare_result_with_origin_model(self,
model, get_origin_result,
check_func_1, check_func_2,
check_func_2, use_cuda,
use_cuda, delta2=1e-5,
iter=20, compare_seperately=True,
delta2=1e-5, rm_drop_out=False,
compare_seperately=True): rm_bn=False):
if use_cuda and not core.is_compiled_with_cuda(): if use_cuda and not core.is_compiled_with_cuda():
return return
global remove_dropout
global remove_bn global remove_bn
remove_dropout = True global remove_dropout
remove_bn = True remove_bn = rm_bn or use_cuda
remove_dropout = rm_drop_out
img, label = init_data( func_1_first_loss, func_1_last_loss = get_origin_result(use_cuda)
batch_size=batch_size, img_shape=img_shape, label_range=999)
func_1_first_loss, func_1_last_loss = check_func_1(
model,
feed_dict={"image": img,
"label": label},
iter=iter,
batch_size=batch_size,
use_cuda=use_cuda)
func_2_first_loss, func_2_last_loss = check_func_2( func_2_first_loss, func_2_last_loss = check_func_2(
model, model,
feed_dict={"image": img, feed_dict=_feed_dict(use_cuda),
"label": label}, iter=_iter(use_cuda),
iter=iter, batch_size=_batch_size(),
batch_size=batch_size,
use_cuda=use_cuda) use_cuda=use_cuda)
if compare_seperately: if compare_seperately:
...@@ -311,97 +340,55 @@ class TestResnet(TestParallelExecutorBase): ...@@ -311,97 +340,55 @@ class TestResnet(TestParallelExecutorBase):
self.assertAlmostEquals( self.assertAlmostEquals(
np.mean(func_1_last_loss), func_2_last_loss[0], delta=delta2) np.mean(func_1_last_loss), func_2_last_loss[0], delta=delta2)
def _compare_with_fused_all_reduce(self,
model,
use_cuda,
iter=20,
delta2=1e-5):
if use_cuda and not core.is_compiled_with_cuda():
return
global remove_bn
remove_bn = True
img, label = init_data(
batch_size=batch_size, img_shape=img_shape, label_range=999)
all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence(
model,
feed_dict={"image": img,
"label": label},
iter=iter,
batch_size=batch_size,
use_cuda=use_cuda,
fuse_all_reduce_ops=False,
optimizer=optimizer)
reduce_first_loss, reduce_last_loss = self.check_network_convergence(
model,
feed_dict={"image": img,
"label": label},
iter=iter,
batch_size=batch_size,
use_cuda=use_cuda,
fuse_all_reduce_ops=True,
optimizer=optimizer)
for loss in zip(all_reduce_first_loss, reduce_first_loss):
self.assertAlmostEquals(loss[0], loss[1], delta=1e-5)
for loss in zip(all_reduce_last_loss, reduce_last_loss):
self.assertAlmostEquals(loss[0], loss[1], delta=delta2)
def test_seresnext_with_reduce(self): def test_seresnext_with_reduce(self):
self._compare_reduce_and_allreduce( self._compare_reduce_and_allreduce(use_cuda=False, delta2=1e-3)
model=SE_ResNeXt50Small, use_cuda=True, delta2=1e-2) self._compare_reduce_and_allreduce(use_cuda=True, delta2=1e-2)
self._compare_reduce_and_allreduce(
model=SE_ResNeXt50Small, use_cuda=False, iter=5)
def test_seresnext_with_fused_all_reduce(self):
self._compare_with_fused_all_reduce(
model=SE_ResNeXt50Small, use_cuda=True, delta2=1e-3)
self._compare_with_fused_all_reduce(
model=SE_ResNeXt50Small, use_cuda=False, iter=2, delta2=1e-3)
def test_seresnext_with_learning_rate_decay(self): def test_seresnext_with_learning_rate_decay(self):
check_func_1 = partial( # NOTE(zcd): This test is compare the result of use parallel_executor and executor,
self.check_network_convergence, # and the result of drop_out op and batch_norm op in this two executor
optimizer=optimizer, # have diff, so the two ops should be removed from the model.
use_parallel_executor=True) check_func_1 = _get_origin_result
check_func_2 = partial( check_func_2 = partial(
self.check_network_convergence, self.check_network_convergence,
optimizer=optimizer, optimizer=optimizer,
use_parallel_executor=False) use_parallel_executor=False)
self._check_resnet_convergence( self._compare_result_with_origin_model(
SE_ResNeXt50Small,
check_func_1,
check_func_2,
use_cuda=True,
compare_seperately=False)
self._check_resnet_convergence(
SE_ResNeXt50Small,
check_func_1, check_func_1,
check_func_2, check_func_2,
use_cuda=False, use_cuda=False,
rm_drop_out=True,
rm_bn=True,
compare_seperately=False, compare_seperately=False,
iter=2,
delta2=1e-3) delta2=1e-3)
self._compare_result_with_origin_model(
check_func_1,
check_func_2,
use_cuda=True,
rm_drop_out=True,
rm_bn=True,
compare_seperately=False)
def test_seresnext_with_fused_optimizer_ops(self): def test_seresnext_with_fused_all_reduce(self):
check_func_1 = partial( # NOTE(zcd): In order to make the program faster,
self.check_network_convergence, fuse_all_optimizer_ops=False) # this unit test remove drop_out and batch_norm.
check_func_1 = _get_origin_result
check_func_2 = partial( check_func_2 = partial(
self.check_network_convergence, fuse_all_optimizer_ops=True) self.check_network_convergence,
# TODO(zcd): this test failed random, I will fix it in next PR. optimizer=optimizer,
# self._check_resnet_convergence( fuse_all_reduce_ops=True)
# SE_ResNeXt50Small, self._compare_result_with_origin_model(
# check_func_1,
# check_func_2,
# use_cuda=True,
# delta2=1e-3)
self._check_resnet_convergence(
SE_ResNeXt50Small,
check_func_1, check_func_1,
check_func_2, check_func_2,
use_cuda=False, use_cuda=False,
iter=2, rm_drop_out=True,
rm_bn=True)
self._compare_result_with_origin_model(
check_func_1,
check_func_2,
use_cuda=True,
rm_drop_out=True,
rm_bn=True,
delta2=1e-3) delta2=1e-3)
......
...@@ -91,6 +91,78 @@ class TestProdOp(OpTest): ...@@ -91,6 +91,78 @@ class TestProdOp(OpTest):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
class TestAllOp(OpTest):
def setUp(self):
self.op_type = "reduce_all"
self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")}
self.outputs = {'Out': self.inputs['X'].all()}
self.attrs = {'reduce_all': True}
def test_check_output(self):
self.check_output()
class TestAllOpWithDim(OpTest):
def setUp(self):
self.op_type = "reduce_all"
self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")}
self.attrs = {'dim': [1]}
self.outputs = {'Out': self.inputs['X'].all(axis=1)}
def test_check_output(self):
self.check_output()
class TestAllOpWithKeepDim(OpTest):
def setUp(self):
self.op_type = "reduce_all"
self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")}
self.attrs = {'dim': [1], 'keep_dim': True}
self.outputs = {
'Out': np.expand_dims(
self.inputs['X'].all(axis=1), axis=1)
}
def test_check_output(self):
self.check_output()
class TestAnyOp(OpTest):
def setUp(self):
self.op_type = "reduce_any"
self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")}
self.outputs = {'Out': self.inputs['X'].any()}
self.attrs = {'reduce_all': True}
def test_check_output(self):
self.check_output()
class TestAnyOpWithDim(OpTest):
def setUp(self):
self.op_type = "reduce_any"
self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")}
self.attrs = {'dim': [1]}
self.outputs = {'Out': self.inputs['X'].any(axis=1)}
def test_check_output(self):
self.check_output()
class TestAnyOpWithKeepDim(OpTest):
def setUp(self):
self.op_type = "reduce_any"
self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")}
self.attrs = {'dim': [1], 'keep_dim': True}
self.outputs = {
'Out': np.expand_dims(
self.inputs['X'].any(axis=1), axis=1)
}
def test_check_output(self):
self.check_output()
class Test1DReduce(OpTest): class Test1DReduce(OpTest):
def setUp(self): def setUp(self):
self.op_type = "reduce_sum" self.op_type = "reduce_sum"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册