From e4aea428c722d6094e18ef2e9f414221ebd91dd2 Mon Sep 17 00:00:00 2001 From: hong <43953930+phlrain@users.noreply.github.com> Date: Fri, 11 Oct 2019 11:35:40 +0800 Subject: [PATCH] New save load interface (#20148) (#20384) * add new save load interface; test=develop * add new save interface; test=develop * add save load interface ; * fix save load error; * fix dygraph set dict bug; * add save load unit test; test=develop * fix test_imperative_optimizer bug; test=develop * fix unitest optimizer bug; test=develop * fix code coverage; test=develop * fix converage; test=develop * add document for apis; test=develop * fix unitest error; test=develop * fix save load unit test error; test=develop * fix error message; test=develop * change set_parameter set_optimizer to save_dygraph; test=develop * add load_graph check; test=develop * fix api spec; test=develop --- paddle/fluid/API.spec | 149 +-- paddle/fluid/framework/CMakeLists.txt | 3 + paddle/fluid/framework/save_load_util.cc | 374 ++++++++ paddle/fluid/framework/save_load_util.h | 54 ++ paddle/fluid/framework/save_load_util_test.cc | 78 ++ paddle/fluid/pybind/CMakeLists.txt | 2 +- paddle/fluid/pybind/protobuf.cc | 1 + paddle/fluid/pybind/pybind.cc | 123 ++- python/paddle/fluid/__init__.py | 4 + python/paddle/fluid/core.py | 8 + python/paddle/fluid/dygraph/checkpoint.py | 312 +++--- python/paddle/fluid/dygraph/layers.py | 104 +- python/paddle/fluid/framework.py | 42 +- python/paddle/fluid/io.py | 102 +- python/paddle/fluid/layers/nn.py | 6 +- python/paddle/fluid/optimizer.py | 197 ++-- .../unittests/test_imperative_checkpoint.py | 172 ---- .../unittests/test_imperative_save_load.py | 903 ++++++++++++++++++ .../test_imperative_save_load_optimizer.py | 196 ---- .../fluid/tests/unittests/test_layers.py | 2 +- .../tests/unittests/test_static_save_load.py | 431 +++++++++ 21 files changed, 2535 insertions(+), 728 deletions(-) create mode 100644 paddle/fluid/framework/save_load_util.cc create mode 100644 paddle/fluid/framework/save_load_util.h create mode 100644 paddle/fluid/framework/save_load_util_test.cc delete mode 100644 python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py create mode 100644 python/paddle/fluid/tests/unittests/test_imperative_save_load.py delete mode 100644 python/paddle/fluid/tests/unittests/test_imperative_save_load_optimizer.py create mode 100644 python/paddle/fluid/tests/unittests/test_static_save_load.py diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 9eb71dc8536..378d446c216 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -17,7 +17,7 @@ paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, ke paddle.fluid.in_dygraph_mode (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'df1f4d1ed7e1eefe04f6361efda6b75a')) paddle.fluid.is_compiled_with_cuda (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '60c7f107a5050aeb58bb74eb175672b5')) paddle.fluid.Variable ('paddle.fluid.framework.Variable', ('document', '8e51f2adcad7ae33aa35a178cceebdf6')) -paddle.fluid.Variable.__init__ (ArgSpec(args=['self', 'block', 'type', 'name', 'shape', 'dtype', 'lod_level', 'capacity', 'persistable', 'error_clip', 'stop_gradient', 'is_data', 'need_check_feed'], varargs=None, keywords='kwargs', defaults=(VarType.LOD_TENSOR, None, None, None, None, None, None, None, False, False, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.Variable.__init__ (ArgSpec(args=['self', 'block', 'type', 'name', 'shape', 'dtype', 'lod_level', 'capacity', 'persistable', 'error_clip', 'stop_gradient', 'is_data', 'need_check_feed', 'belong_to_optimizer'], varargs=None, keywords='kwargs', defaults=(VarType.LOD_TENSOR, None, None, None, None, None, None, None, False, False, False, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.Variable.astype (ArgSpec(args=['self', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', 'ffc7049afe250829b56986c40cf3cca3')) paddle.fluid.Variable.backward (ArgSpec(args=['self', 'backward_strategy'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a2fabca0102442aae4d6537028574d07')) paddle.fluid.Variable.clear_gradient (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'c6e511dcccf51a95eeb83447322b4345')) @@ -79,6 +79,8 @@ paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_pr paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment', 'program_only'], varargs=None, keywords=None, defaults=(None, None, None, True, False)), ('document', 'fc82bfd137a9b1ab8ebd1651bd35b6e5')) paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '7a863032bf7613dec1c8dd99efbd82e5')) paddle.fluid.io.batch (ArgSpec(args=['reader', 'batch_size', 'drop_last'], varargs=None, keywords=None, defaults=(False,)), ('document', 'cf2869b408b39cadadd95206b4e03b39')) +paddle.fluid.io.save (ArgSpec(args=['program', 'model_path'], varargs=None, keywords=None, defaults=None), ('document', 'cef7d50c36b93c02b6d12bcea7d025ce')) +paddle.fluid.io.load (ArgSpec(args=['program', 'model_path'], varargs=None, keywords=None, defaults=None), ('document', '8d0f200c20f8a4581e1843967230ad45')) paddle.fluid.io.PyReader ('paddle.fluid.reader.PyReader', ('document', 'b03399246f69cd6fc03b43e87af8bd4e')) paddle.fluid.io.PyReader.__init__ (ArgSpec(args=['self', 'feed_list', 'capacity', 'use_double_buffer', 'iterable', 'return_list'], varargs=None, keywords=None, defaults=(None, None, True, True, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.io.PyReader.decorate_batch_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', '4364e836e3cb8ab5e68e411b763c50c7')) @@ -538,9 +540,10 @@ paddle.fluid.contrib.BasicGRUUnit.create_variable (ArgSpec(args=['self', 'name', paddle.fluid.contrib.BasicGRUUnit.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.BasicGRUUnit.forward (ArgSpec(args=['self', 'input', 'pre_hidden'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.BasicGRUUnit.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.contrib.BasicGRUUnit.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.BasicGRUUnit.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.contrib.BasicGRUUnit.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.contrib.BasicGRUUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.BasicGRUUnit.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.contrib.BasicGRUUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.contrib.BasicGRUUnit.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.contrib.BasicGRUUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.basic_gru (ArgSpec(args=['input', 'init_hidden', 'hidden_size', 'num_layers', 'sequence_length', 'dropout_prob', 'bidirectional', 'batch_first', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(1, None, 0.0, False, True, None, None, None, None, 'float32', 'basic_gru')), ('document', '0afcbe4fbe1b8c35eda58b4efe48f9fd')) @@ -555,9 +558,10 @@ paddle.fluid.contrib.BasicLSTMUnit.create_variable (ArgSpec(args=['self', 'name' paddle.fluid.contrib.BasicLSTMUnit.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.BasicLSTMUnit.forward (ArgSpec(args=['self', 'input', 'pre_hidden', 'pre_cell'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.BasicLSTMUnit.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.contrib.BasicLSTMUnit.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.BasicLSTMUnit.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.contrib.BasicLSTMUnit.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.contrib.BasicLSTMUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.BasicLSTMUnit.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.contrib.BasicLSTMUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.contrib.BasicLSTMUnit.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.contrib.BasicLSTMUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.basic_lstm (ArgSpec(args=['input', 'init_hidden', 'init_cell', 'hidden_size', 'num_layers', 'sequence_length', 'dropout_prob', 'bidirectional', 'batch_first', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'forget_bias', 'dtype', 'name'], varargs=None, keywords=None, defaults=(1, None, 0.0, False, True, None, None, None, None, 1.0, 'float32', 'basic_lstm')), ('document', 'fe4d0c3c55a162b8cfe10b05fabb7ce4')) @@ -574,9 +578,10 @@ paddle.fluid.dygraph.Layer.create_variable (ArgSpec(args=['self', 'name', 'persi paddle.fluid.dygraph.Layer.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Layer.forward (ArgSpec(args=['self'], varargs='inputs', keywords='kwargs', defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Layer.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.Layer.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Layer.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.Layer.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.Layer.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Layer.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.Layer.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.Layer.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.Layer.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.__impl__ (ArgSpec(args=['func'], varargs=None, keywords=None, defaults=()), ('document', '75d1d3afccc8b39cdebf05cb1f5969f9')) @@ -593,9 +598,10 @@ paddle.fluid.dygraph.Conv2D.create_variable (ArgSpec(args=['self', 'name', 'pers paddle.fluid.dygraph.Conv2D.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv2D.forward (ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv2D.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.Conv2D.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Conv2D.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.Conv2D.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.Conv2D.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Conv2D.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.Conv2D.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.Conv2D.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.Conv2D.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv3D ('paddle.fluid.dygraph.nn.Conv3D', ('document', '50412bd3fbf3557a8ef48e25c6517025')) @@ -609,9 +615,10 @@ paddle.fluid.dygraph.Conv3D.create_variable (ArgSpec(args=['self', 'name', 'pers paddle.fluid.dygraph.Conv3D.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv3D.forward (ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv3D.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.Conv3D.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Conv3D.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.Conv3D.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.Conv3D.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Conv3D.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.Conv3D.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.Conv3D.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.Conv3D.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Pool2D ('paddle.fluid.dygraph.nn.Pool2D', ('document', '50e6fd200e42859daf2924ecb0561ada')) @@ -625,9 +632,10 @@ paddle.fluid.dygraph.Pool2D.create_variable (ArgSpec(args=['self', 'name', 'pers paddle.fluid.dygraph.Pool2D.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Pool2D.forward (ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Pool2D.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.Pool2D.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Pool2D.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.Pool2D.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.Pool2D.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Pool2D.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.Pool2D.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.Pool2D.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.Pool2D.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.FC ('paddle.fluid.dygraph.nn.FC', ('document', '2f73ae00e57c67454c6aa7e911d9bfd6')) @@ -641,9 +649,10 @@ paddle.fluid.dygraph.FC.create_variable (ArgSpec(args=['self', 'name', 'persista paddle.fluid.dygraph.FC.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.FC.forward (ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.FC.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.FC.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.FC.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.FC.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.FC.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.FC.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.FC.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.FC.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.FC.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.BatchNorm ('paddle.fluid.dygraph.nn.BatchNorm', ('document', '390fb9b986423ec6680731ffc7cf24ab')) @@ -657,9 +666,10 @@ paddle.fluid.dygraph.BatchNorm.create_variable (ArgSpec(args=['self', 'name', 'p paddle.fluid.dygraph.BatchNorm.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.BatchNorm.forward (ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.BatchNorm.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.BatchNorm.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.BatchNorm.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.BatchNorm.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.BatchNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.BatchNorm.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.BatchNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.BatchNorm.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.BatchNorm.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Embedding ('paddle.fluid.dygraph.nn.Embedding', ('document', 'b1b1ed9dc2125c3e16ee08113605fcb4')) @@ -673,9 +683,10 @@ paddle.fluid.dygraph.Embedding.create_variable (ArgSpec(args=['self', 'name', 'p paddle.fluid.dygraph.Embedding.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Embedding.forward (ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Embedding.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.Embedding.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Embedding.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.Embedding.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.Embedding.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Embedding.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.Embedding.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.Embedding.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.Embedding.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.GRUUnit ('paddle.fluid.dygraph.nn.GRUUnit', ('document', '389e860e455b67aab1f4d472ac9d7e49')) @@ -689,9 +700,10 @@ paddle.fluid.dygraph.GRUUnit.create_variable (ArgSpec(args=['self', 'name', 'per paddle.fluid.dygraph.GRUUnit.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.GRUUnit.forward (ArgSpec(args=['self', 'input', 'hidden'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.GRUUnit.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.GRUUnit.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.GRUUnit.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.GRUUnit.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.GRUUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.GRUUnit.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.GRUUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.GRUUnit.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.GRUUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.LayerNorm ('paddle.fluid.dygraph.nn.LayerNorm', ('document', '8bc39f59fe2d3713bc143fdf1222a63b')) @@ -705,9 +717,10 @@ paddle.fluid.dygraph.LayerNorm.create_variable (ArgSpec(args=['self', 'name', 'p paddle.fluid.dygraph.LayerNorm.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.LayerNorm.forward (ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.LayerNorm.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.LayerNorm.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.LayerNorm.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.LayerNorm.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.LayerNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.LayerNorm.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.LayerNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.LayerNorm.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.LayerNorm.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.NCE ('paddle.fluid.dygraph.nn.NCE', ('document', '993aeea9be436e9c709a758795cb23e9')) @@ -721,9 +734,10 @@ paddle.fluid.dygraph.NCE.create_variable (ArgSpec(args=['self', 'name', 'persist paddle.fluid.dygraph.NCE.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.NCE.forward (ArgSpec(args=['self', 'input', 'label', 'sample_weight'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.NCE.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.NCE.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.NCE.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.NCE.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.NCE.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.NCE.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.NCE.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.NCE.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.NCE.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.PRelu ('paddle.fluid.dygraph.nn.PRelu', ('document', 'da956af1676b08bf15553751a3643b55')) @@ -737,9 +751,10 @@ paddle.fluid.dygraph.PRelu.create_variable (ArgSpec(args=['self', 'name', 'persi paddle.fluid.dygraph.PRelu.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.PRelu.forward (ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.PRelu.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.PRelu.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.PRelu.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.PRelu.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.PRelu.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.PRelu.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.PRelu.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.PRelu.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.PRelu.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.BilinearTensorProduct ('paddle.fluid.dygraph.nn.BilinearTensorProduct', ('document', 'be70d0f6d43729d9cb80c9a34ed5f26b')) @@ -753,9 +768,10 @@ paddle.fluid.dygraph.BilinearTensorProduct.create_variable (ArgSpec(args=['self' paddle.fluid.dygraph.BilinearTensorProduct.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.BilinearTensorProduct.forward (ArgSpec(args=['self', 'x', 'y'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.BilinearTensorProduct.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.BilinearTensorProduct.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.BilinearTensorProduct.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.BilinearTensorProduct.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.BilinearTensorProduct.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.BilinearTensorProduct.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.BilinearTensorProduct.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.BilinearTensorProduct.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.BilinearTensorProduct.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv2DTranspose ('paddle.fluid.dygraph.nn.Conv2DTranspose', ('document', 'cf23c905abc00b07603dfa71a432d6f7')) @@ -769,9 +785,10 @@ paddle.fluid.dygraph.Conv2DTranspose.create_variable (ArgSpec(args=['self', 'nam paddle.fluid.dygraph.Conv2DTranspose.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv2DTranspose.forward (ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv2DTranspose.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.Conv2DTranspose.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Conv2DTranspose.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.Conv2DTranspose.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.Conv2DTranspose.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Conv2DTranspose.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.Conv2DTranspose.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.Conv2DTranspose.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.Conv2DTranspose.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv3DTranspose ('paddle.fluid.dygraph.nn.Conv3DTranspose', ('document', '91ba132bc690eaf76eabdbde8f87e4a0')) @@ -785,9 +802,10 @@ paddle.fluid.dygraph.Conv3DTranspose.create_variable (ArgSpec(args=['self', 'nam paddle.fluid.dygraph.Conv3DTranspose.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv3DTranspose.forward (ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv3DTranspose.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.Conv3DTranspose.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Conv3DTranspose.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.Conv3DTranspose.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.Conv3DTranspose.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.Conv3DTranspose.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.Conv3DTranspose.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.Conv3DTranspose.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.Conv3DTranspose.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.GroupNorm ('paddle.fluid.dygraph.nn.GroupNorm', ('document', '72c125b07bdd1e612607dc77039b2722')) @@ -801,9 +819,10 @@ paddle.fluid.dygraph.GroupNorm.create_variable (ArgSpec(args=['self', 'name', 'p paddle.fluid.dygraph.GroupNorm.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.GroupNorm.forward (ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.GroupNorm.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.GroupNorm.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.GroupNorm.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.GroupNorm.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.GroupNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.GroupNorm.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.GroupNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.GroupNorm.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.GroupNorm.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.SpectralNorm ('paddle.fluid.dygraph.nn.SpectralNorm', ('document', '8f5cfbc431a8b4b44b605cde8b0381ef')) @@ -817,9 +836,10 @@ paddle.fluid.dygraph.SpectralNorm.create_variable (ArgSpec(args=['self', 'name', paddle.fluid.dygraph.SpectralNorm.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.SpectralNorm.forward (ArgSpec(args=['self', 'weight'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.SpectralNorm.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.SpectralNorm.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.SpectralNorm.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.SpectralNorm.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.SpectralNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.SpectralNorm.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.SpectralNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.SpectralNorm.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.SpectralNorm.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.TreeConv ('paddle.fluid.dygraph.nn.TreeConv', ('document', '6e175a7bf2a43ae6c0f3a8a54bd69afe')) @@ -833,9 +853,10 @@ paddle.fluid.dygraph.TreeConv.create_variable (ArgSpec(args=['self', 'name', 'pe paddle.fluid.dygraph.TreeConv.eval (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.TreeConv.forward (ArgSpec(args=['self', 'nodes_vector', 'edge_set'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.TreeConv.full_name (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '23ce4f961f48ed0f79cadf93a3938ed2')) -paddle.fluid.dygraph.TreeConv.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.TreeConv.load_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '0d90496acb61c3884865c6712c3cfd82')) paddle.fluid.dygraph.TreeConv.parameters (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '5aec25a854eb57abc798dccccbb507d5')) -paddle.fluid.dygraph.TreeConv.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.dygraph.TreeConv.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '579561dd9ae133ceb8a7ee18c2dd7232')) +paddle.fluid.dygraph.TreeConv.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.TreeConv.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.TreeConv.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Tracer ('paddle.fluid.dygraph.tracer.Tracer', ('document', '28d72409112111274c33e1f07229d5da')) @@ -847,8 +868,8 @@ paddle.fluid.dygraph.Tracer.trace_op (ArgSpec(args=['self', 'type', 'inputs', 'o paddle.fluid.dygraph.Tracer.trace_var (ArgSpec(args=['self', 'name', 'var'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Tracer.train_mode (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.prepare_context (ArgSpec(args=['strategy'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.dygraph.save_persistables (ArgSpec(args=['model_dict', 'dirname', 'optimizers'], varargs=None, keywords=None, defaults=('save_dir', None)), ('document', 'b0b2ec2a502214a737300fb648cb9dc7')) -paddle.fluid.dygraph.load_persistables (ArgSpec(args=['dirname'], varargs=None, keywords=None, defaults=('save_dir',)), ('document', 'e0709f8259620fdcfd2c0c1b23348852')) +paddle.fluid.dygraph.save_dygraph (ArgSpec(args=['state_dict', 'model_path'], varargs=None, keywords=None, defaults=None), ('document', '7c2bd58a69f9bca3b884f44154c84569')) +paddle.fluid.dygraph.load_dygraph (ArgSpec(args=['model_path'], varargs=None, keywords=None, defaults=None), ('document', 'd6d98002c39d2484835f4748e35b761c')) paddle.fluid.dygraph.NoamDecay ('paddle.fluid.dygraph.learning_rate_scheduler.NoamDecay', ('document', '9ccfea97dbf15134d406a23aae1e1fa2')) paddle.fluid.dygraph.NoamDecay.__init__ (ArgSpec(args=['self', 'd_model', 'warmup_steps', 'begin', 'step', 'dtype'], varargs=None, keywords=None, defaults=(1, 1, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.NoamDecay.create_lr_var (ArgSpec(args=['self', 'lr'], varargs=None, keywords=None, defaults=None), ('document', '013bc233558149d0757b3df57845b866')) @@ -909,80 +930,90 @@ paddle.fluid.optimizer.SGDOptimizer.apply_gradients (ArgSpec(args=['self', 'para paddle.fluid.optimizer.SGDOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.SGDOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.SGDOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.SGDOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.SGDOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.SGDOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.SGDOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.MomentumOptimizer ('paddle.fluid.optimizer.MomentumOptimizer', ('document', 'a72bd02e5459e64596897d190413d449')) paddle.fluid.optimizer.MomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.MomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.MomentumOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.MomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.MomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.MomentumOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.MomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.MomentumOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.MomentumOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.AdagradOptimizer ('paddle.fluid.optimizer.AdagradOptimizer', ('document', 'b6508a25326275d44e658dd73bcd5593')) paddle.fluid.optimizer.AdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.AdagradOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.AdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.AdagradOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.AdagradOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.AdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.AdagradOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.AdagradOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.AdamOptimizer ('paddle.fluid.optimizer.AdamOptimizer', ('document', '34e694895a702ba18a7b5ae618458217')) paddle.fluid.optimizer.AdamOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdamOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.AdamOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.AdamOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.AdamOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.AdamOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.AdamOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.AdamOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.AdamOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.AdamaxOptimizer ('paddle.fluid.optimizer.AdamaxOptimizer', ('document', '515bae60aa82e7fbd1046f59e56549bb')) paddle.fluid.optimizer.AdamaxOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.AdamaxOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.AdamaxOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.AdamaxOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.AdamaxOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.AdamaxOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.AdamaxOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.AdamaxOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.DpsgdOptimizer ('paddle.fluid.optimizer.DpsgdOptimizer', ('document', '71113c30b66c0f4035b10ebd8af8c5ad')) paddle.fluid.optimizer.DpsgdOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'clip', 'batch_size', 'sigma'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.DpsgdOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.DpsgdOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.DpsgdOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.DpsgdOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.DpsgdOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.DpsgdOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.DpsgdOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.DpsgdOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.DecayedAdagradOptimizer ('paddle.fluid.optimizer.DecayedAdagradOptimizer', ('document', '6f5adb9f881a3b182236e344033dbd44')) paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.DecayedAdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.DecayedAdagradOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.DecayedAdagradOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.DecayedAdagradOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.DecayedAdagradOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.FtrlOptimizer ('paddle.fluid.optimizer.FtrlOptimizer', ('document', 'cba8aae0a267b9a4d8833ae79a00fc55')) paddle.fluid.optimizer.FtrlOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.FtrlOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.FtrlOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.FtrlOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.FtrlOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.FtrlOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.FtrlOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.FtrlOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.FtrlOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.RMSPropOptimizer ('paddle.fluid.optimizer.RMSPropOptimizer', ('document', '5217bc4fc399010021d6b70541005780')) paddle.fluid.optimizer.RMSPropOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.RMSPropOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.RMSPropOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.RMSPropOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.RMSPropOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.RMSPropOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.AdadeltaOptimizer ('paddle.fluid.optimizer.AdadeltaOptimizer', ('document', '6710aad9700421ac6ccb4088b6b7f250')) paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.AdadeltaOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.AdadeltaOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.AdadeltaOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.AdadeltaOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.AdadeltaOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.AdadeltaOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.AdadeltaOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.ModelAverage ('paddle.fluid.optimizer.ModelAverage', ('document', 'e039a4b422ce5b360b4d777481d64975')) paddle.fluid.optimizer.ModelAverage.__init__ (ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.ModelAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '582c279ec4792edf2d95a3064578da7b')) @@ -990,33 +1021,37 @@ paddle.fluid.optimizer.ModelAverage.apply_gradients (ArgSpec(args=['self', 'para paddle.fluid.optimizer.ModelAverage.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.ModelAverage.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.ModelAverage.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.ModelAverage.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.ModelAverage.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) paddle.fluid.optimizer.ModelAverage.restore (ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None), ('document', '7917cbe4d3ed7954ae73360fbccc39f6')) +paddle.fluid.optimizer.ModelAverage.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.ModelAverage.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.LarsMomentumOptimizer ('paddle.fluid.optimizer.LarsMomentumOptimizer', ('document', '030b9092a96a409b1bf5446bf45d0659')) paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.LarsMomentumOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.LarsMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.LarsMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.LarsMomentumOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.LarsMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.LarsMomentumOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.LarsMomentumOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.DGCMomentumOptimizer ('paddle.fluid.optimizer.DGCMomentumOptimizer', ('document', 'facdbef1b4871d0cf74c736ff2e94720')) paddle.fluid.optimizer.DGCMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'rampup_begin_step', 'rampup_step', 'sparsity', 'use_nesterov', 'local_grad_clip_norm', 'num_trainers', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1, [0.999], False, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.DGCMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.DGCMomentumOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.DGCMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.DGCMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.DGCMomentumOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.DGCMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.DGCMomentumOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.DGCMomentumOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.LambOptimizer ('paddle.fluid.optimizer.LambOptimizer', ('document', '56b5b21dc8fb01174c3bdd0b24f8be4b')) paddle.fluid.optimizer.LambOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'lamb_weight_decay', 'beta1', 'beta2', 'epsilon', 'regularization', 'exclude_from_weight_decay_fn', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.01, 0.9, 0.999, 1e-06, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.LambOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.LambOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.LambOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'd2a59fb4c678a2feb231fc5b1adcc9b4')) paddle.fluid.optimizer.LambOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.optimizer.LambOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.LambOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304')) +paddle.fluid.optimizer.LambOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.LambOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.optimizer.ExponentialMovingAverage ('paddle.fluid.optimizer.ExponentialMovingAverage', ('document', 'c93a5e29890877d94c3180b3d5f2d464')) paddle.fluid.optimizer.ExponentialMovingAverage.__init__ (ArgSpec(args=['self', 'decay', 'thres_steps', 'name'], varargs=None, keywords=None, defaults=(0.999, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.ExponentialMovingAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '178fa374c299906eb88c0df8ed21a337')) @@ -1036,6 +1071,8 @@ paddle.fluid.optimizer.RecomputeOptimizer.backward (ArgSpec(args=['self', 'loss' paddle.fluid.optimizer.RecomputeOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.RecomputeOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '7b2b8ae72011bc4decb67e97623f2c56')) paddle.fluid.optimizer.RecomputeOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.optimizer.RecomputeOptimizer.set_dict (ArgSpec(args=['self', 'state_dict'], varargs=None, keywords=None, defaults=None), ('document', '36aa497a2d29abaa4147987d71721d17')) +paddle.fluid.optimizer.RecomputeOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.backward.append_backward (ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks', 'checkpoints'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'c68fe1cb95d90762b57c309cae9b99d9')) paddle.fluid.backward.gradients (ArgSpec(args=['targets', 'inputs', 'target_gradients', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'e2097e1e0ed84ae44951437bfe269a1b')) paddle.fluid.regularizer.L1DecayRegularizer ('paddle.fluid.regularizer.L1DecayRegularizer', ('document', '34603757e70974d2fcc730643b382925')) @@ -1093,3 +1130,5 @@ paddle.fluid.unique_name.switch (ArgSpec(args=['new_generator'], varargs=None, k paddle.fluid.unique_name.guard (ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ead717d6d440a1eb11971695cd1727f4')) paddle.fluid.Scope Scope() -> paddle.fluid.core_avx._Scope paddle.fluid.install_check.run_check (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '66b7c84a17ed32fec2df9628367be2b9')) +paddle.fluid.save (ArgSpec(args=['program', 'model_path'], varargs=None, keywords=None, defaults=None), ('document', 'cef7d50c36b93c02b6d12bcea7d025ce')) +paddle.fluid.load (ArgSpec(args=['program', 'model_path'], varargs=None, keywords=None, defaults=None), ('document', '8d0f200c20f8a4581e1843967230ad45')) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index cbdb8e87e57..c7fa68c26ab 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -226,6 +226,9 @@ cc_test(dlpack_tensor_test SRCS dlpack_tensor_test.cc DEPS dlpack_tensor glog) cc_library(op_compatible_info SRCS op_compatible_info DEPS string_helper proto_desc) cc_test(op_compatible_info_test SRCS op_compatible_info_test.cc DEPS op_compatible_info proto_desc string_helper glog) +cc_library(save_load_util SRCS save_load_util DEPS tensor scope layer) +cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer) + # Get the current working branch execute_process( COMMAND git rev-parse --abbrev-ref HEAD diff --git a/paddle/fluid/framework/save_load_util.cc b/paddle/fluid/framework/save_load_util.cc new file mode 100644 index 00000000000..76544af052f --- /dev/null +++ b/paddle/fluid/framework/save_load_util.cc @@ -0,0 +1,374 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/save_load_util.h" + +#include +#include +#include +#include + +#include "paddle/fluid/imperative/layer.h" + +namespace paddle { +namespace framework { + +const int model_file_reserve_size = 256; +const std::string tensor_number_mark = "TNUM"; // NOLINT +const std::string tensor_name_mark = "NAME"; // NOLINT + +void CheckInStreamState(std::istream& istre, size_t length) { + if (!istre) { + VLOG(5) << "Can't read [" << length << "] from file" + << "file seems breakem"; + + PADDLE_THROW("Model load error, file seems breaken"); + } +} + +struct DeserializedDataFunctor { + DeserializedDataFunctor(void** buf, Tensor* tensor, + const platform::Place& place) + : buf_(buf), tensor_(tensor), place_(place) {} + + template + void apply() { + *buf_ = tensor_->mutable_data(place_); + } + + void** buf_; + Tensor* tensor_; + platform::Place place_; +}; + +size_t ReadTensorNumber(std::istream& istre) { + char* tensor_number_mark_buffer = new char[tensor_number_mark.size()]; + istre.read(tensor_number_mark_buffer, + sizeof(char) * tensor_number_mark.size()); + std::string str_read_tensor_number_mark(tensor_number_mark_buffer, + tensor_number_mark.size()); + PADDLE_ENFORCE_EQ( + tensor_number_mark, str_read_tensor_number_mark, + "Tensor number mark not match, expect [%s], but read from file is [%]", + tensor_number_mark, str_read_tensor_number_mark); + + size_t tensor_number = 0; + istre.read(reinterpret_cast(&tensor_number), sizeof(tensor_number)); + + CheckInStreamState(istre, sizeof(tensor_number)); + + delete[] tensor_number_mark_buffer; + return tensor_number; +} + +std::string ReadTensorName(std::istream& istre) { + char* name_mark_buffer = new char[tensor_name_mark.size()]; + istre.read(name_mark_buffer, sizeof(char) * tensor_name_mark.size()); + CheckInStreamState(istre, sizeof(char) * tensor_name_mark.size()); + + std::string str_read_tensor_name_mark(name_mark_buffer, + tensor_name_mark.size()); + PADDLE_ENFORCE_EQ( + tensor_name_mark, str_read_tensor_name_mark, + "Tensor name mark not match, expect [%s], but read from file is [%]", + tensor_name_mark, str_read_tensor_name_mark); + + size_t tensor_name_length = 0; + istre.read(reinterpret_cast(&tensor_name_length), + sizeof(tensor_name_length)); + + CheckInStreamState(istre, sizeof(tensor_name_length)); + + char* tensor_name_buffer = new char[tensor_name_length]; + istre.read(tensor_name_buffer, sizeof(char) * tensor_name_length); + CheckInStreamState(istre, sizeof(char) * tensor_name_length); + + std::string str_tensor_name(tensor_name_buffer, tensor_name_length); + + delete[] name_mark_buffer; + delete[] tensor_name_buffer; + + return str_tensor_name; +} + +void ReadReserveBuffer(std::istream& istre) { + char* reserve_buffer = new char[model_file_reserve_size]; + istre.read(reserve_buffer, sizeof(char) * model_file_reserve_size); + CheckInStreamState(istre, model_file_reserve_size); + + delete[] reserve_buffer; +} + +bool SaveStaticNameListToDisk( + const std::string& file_name, + const std::vector& vec_tensor_name_list, const Scope& scope) { + std::map map_tensor; + + for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) { + auto var_ptr = scope.FindVar(vec_tensor_name_list[i]); + PADDLE_ENFORCE_NE( + var_ptr, nullptr, + "Variable find error, when save model, can't not find vairable [%s], " + "Please make sure you have run StartUpProgram", + vec_tensor_name_list[i]); + Tensor* tensor = var_ptr->GetMutable(); + PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, + "Paramter [%s] not initialzed," + "Please make sure you have run StartUpProgram", + vec_tensor_name_list[i]); + + map_tensor[vec_tensor_name_list[i]] = tensor; + } + + return SaveTensorToDisk(file_name, map_tensor); +} + +bool SaveDygraphVarBaseListToDisk( + const std::string& file_name, + const std::vector>& + vec_var_base_list) { + std::map map_tensor; + for (size_t i = 0; i < vec_var_base_list.size(); ++i) { + auto var_ptr = vec_var_base_list[i]->MutableVar(); + + Tensor* tensor = var_ptr->GetMutable(); + + PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, + "Paramter [%s] not initialzed," + "Please make sure you have run StartUpProgram", + vec_var_base_list[i]->Name()); + + map_tensor[vec_var_base_list[i]->Name()] = tensor; + } + + return SaveTensorToDisk(file_name, map_tensor); +} + +const std::vector> +LoadDygraphVarBaseListFromDisk(const std::string& file_name) { + std::map> map_load_tensor; + LoadTensorFromDisk(file_name, &map_load_tensor); + + std::vector> vec_res; + vec_res.reserve(map_load_tensor.size()); + for (auto& load_tensor : map_load_tensor) { + std::shared_ptr var( + new imperative::VarBase(load_tensor.first)); + + auto* tensor = var->MutableVar()->GetMutable(); + + TensorCopySync(*(load_tensor.second.get()), load_tensor.second->place(), + tensor); + + vec_res.emplace_back(var); + } + + return vec_res; +} + +bool LoadStaticNameListFromDisk( + const std::string& file_name, + const std::vector& vec_tensor_name_list, const Scope& scope) { + std::map> map_load_tensor; + LoadTensorFromDisk(file_name, &map_load_tensor); + + for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) { + auto it = map_load_tensor.find(vec_tensor_name_list[i]); + PADDLE_ENFORCE(it != map_load_tensor.end(), + "Paramete not found in Model file, " + "Can not find [%s] in model file [%s]", + vec_tensor_name_list[i], file_name); + auto var_ptr = scope.FindVar(vec_tensor_name_list[i]); + + PADDLE_ENFORCE_NE( + var_ptr, nullptr, + "Parameter not created, when load model, can't not find parameter [%s] " + "please make sure you have run StartUpProgram", + vec_tensor_name_list[i]); + + Tensor* tensor = var_ptr->GetMutable(); + PADDLE_ENFORCE_NE(tensor, nullptr, + "Paramter [%s] not initialzed " + "please make sure you have run startUpProgram", + vec_tensor_name_list[i]); + + PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, + "Paramter [%s] not initialzed " + "please make sure you have run StartUpProgram", + vec_tensor_name_list[i]); + PADDLE_ENFORCE_EQ( + tensor->dims(), it->second->dims(), + "Shape not matching: the Program requires a parameter with a shape of " + "(%s), " + "while the loaded parameter (namely [ %s ]) has a shape of (%s).", + tensor->dims(), vec_tensor_name_list[i], it->second->dims()); + + TensorCopySync(*(it->second.get()), tensor->place(), tensor); + + map_load_tensor.erase(it); + } + + if (map_load_tensor.size() > 0) { + std::string used_tensor_message = "There is [" + + std::to_string(map_load_tensor.size()) + + "] tensor in model file not used: "; + + for (auto& tensor_temp : map_load_tensor) { + used_tensor_message += " " + tensor_temp.first; + } + + LOG(ERROR) << used_tensor_message; + } + + return true; +} + +bool SaveTensorToDisk(const std::string& file_name, + const std::map& map_tensor) { + MkDirRecursively(DirName(file_name).c_str()); + + std::ofstream fout(file_name, std::ios::binary); + if (!fout) { + PADDLE_THROW("File open error. Can not open file [%s]", file_name); + } + + // first 256 byte for reserve for fulture upgrade + char* kReserveBuffer = new char[model_file_reserve_size]; + fout.write(kReserveBuffer, sizeof(char) * model_file_reserve_size); + delete[] kReserveBuffer; + + fout.write(tensor_number_mark.c_str(), + sizeof(char) * tensor_number_mark.size()); + size_t tensor_number = map_tensor.size(); + fout.write(reinterpret_cast(&tensor_number), + sizeof(tensor_number)); + + for (auto& itera : map_tensor) { + // first save tensor name + fout.write(tensor_name_mark.c_str(), + sizeof(char) * tensor_name_mark.size()); + size_t name_length = itera.first.size(); + fout.write(reinterpret_cast(&name_length), + sizeof(name_length)); + fout.write(itera.first.c_str(), sizeof(char) * name_length); + // write tensor version + constexpr uint32_t version = 0; + fout.write(reinterpret_cast(&version), sizeof(version)); + + // the 2nd field, tensor description + // int32_t size + // void* protobuf message + auto tensor = itera.second; + + proto::VarType::TensorDesc desc; + desc.set_data_type(tensor->type()); + auto dims = framework::vectorize(tensor->dims()); + auto* pb_dims = desc.mutable_dims(); + pb_dims->Resize(static_cast(dims.size()), 0); + std::copy(dims.begin(), dims.end(), pb_dims->begin()); + int32_t size = desc.ByteSize(); + fout.write(reinterpret_cast(&size), sizeof(size)); + auto out = desc.SerializeAsString(); + fout.write(out.data(), size); + + // save tensor + uint64_t data_size = + tensor->numel() * framework::SizeOfType(tensor->type()); + auto* data_ptr = tensor->data(); + if (platform::is_gpu_place(tensor->place())) { +#ifdef PADDLE_WITH_CUDA + framework::Tensor temp; + TensorCopySync(*tensor, platform::CPUPlace(), &temp); + data_ptr = temp.data(); +#else + PADDLE_THROW( + "Tensor is in CUDA device, but paddle not compile with CUDA, this " + "should not happen"); +#endif + } + fout.write(static_cast(data_ptr), + static_cast(data_size)); + } + + if (!fout) { + PADDLE_THROW("Model save failed, data write to model file [%s] error", + file_name); + } + + fout.close(); + + return true; +} + +bool LoadTensorFromDisk( + const std::string& file_name, + std::map>* map_tensor) { + std::ifstream fin(file_name, std::ios::binary); + + if (!fin) { + PADDLE_THROW("File open error. Can not open model file [%s]", file_name); + } + + ReadReserveBuffer(fin); + + size_t tensor_number = ReadTensorNumber(fin); + + for (size_t i = 0; i < tensor_number; ++i) { + std::string str_tensor_name = ReadTensorName(fin); + + std::shared_ptr tensor_temp(new Tensor()); + uint32_t version; + fin.read(reinterpret_cast(&version), sizeof(version)); + CheckInStreamState(fin, sizeof(version)); + PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); + proto::VarType::TensorDesc desc; + { + // int32_t size + // proto buffer + int32_t size; + fin.read(reinterpret_cast(&size), sizeof(size)); + CheckInStreamState(fin, sizeof(size)); + std::unique_ptr buf(new char[size]); + fin.read(reinterpret_cast(buf.get()), size); + CheckInStreamState(fin, sizeof(size)); + PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size), + "Cannot parse tensor desc"); + } + + { // read tensor + std::vector dims; + dims.reserve(static_cast(desc.dims().size())); + std::copy(desc.dims().begin(), desc.dims().end(), + std::back_inserter(dims)); + auto new_dim = framework::make_ddim(dims); + tensor_temp->Resize(new_dim); + void* buf; + framework::VisitDataType(desc.data_type(), + DeserializedDataFunctor(&buf, tensor_temp.get(), + platform::CPUPlace())); + size_t size = + tensor_temp->numel() * framework::SizeOfType(desc.data_type()); + + fin.read(reinterpret_cast(buf), size); + CheckInStreamState(fin, size); + } + + (*map_tensor)[str_tensor_name] = tensor_temp; + } + + return true; +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/save_load_util.h b/paddle/fluid/framework/save_load_util.h new file mode 100644 index 00000000000..6b50c93ddbd --- /dev/null +++ b/paddle/fluid/framework/save_load_util.h @@ -0,0 +1,54 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/imperative/type_defs.h" + +namespace paddle { +namespace framework { + +bool SaveStaticNameListToDisk( + const std::string& file_name, + const std::vector& vec_tensor_name_list, const Scope& scope); + +bool LoadStaticNameListFromDisk( + const std::string& file_name, + const std::vector& vec_tensor_name_list, const Scope& scope); + +bool SaveDygraphVarBaseListToDisk( + const std::string& file_name, + const std::vector>& vec_var_base_list); + +const std::vector> +LoadDygraphVarBaseListFromDisk(const std::string& file_name); + +bool SaveTensorToDisk(const std::string& file_name, + const std::map& map_tensor); + +bool LoadTensorFromDisk( + const std::string& file_name, + std::map>* map_tensor); + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/save_load_util_test.cc b/paddle/fluid/framework/save_load_util_test.cc new file mode 100644 index 00000000000..4a54e2d4668 --- /dev/null +++ b/paddle/fluid/framework/save_load_util_test.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/save_load_util.h" +#include "paddle/fluid/platform/macros.h" + +namespace paddle { +namespace framework { +TEST(test_save_load_util, test_save_load) { + srand(time(NULL)); + auto cpu_place = platform::CPUPlace(); + Tensor tensor1; + tensor1.Resize({1000, 1000}); + auto src_data_1 = tensor1.mutable_data(cpu_place); + Tensor tensor2; + tensor2.Resize({5000, 1000}); + auto src_data_2 = tensor2.mutable_data(cpu_place); + + for (int64_t i = 0; i < tensor1.numel(); ++i) { + float temp = (rand() % 10000) * 1.0 / 50000 - 1.0; // NOLINT + + src_data_1[i] = temp; + } + + for (int64_t i = 0; i < tensor2.numel(); ++i) { + float temp = (rand() % 10000) * 1.0 / 50000 - 1.0; // NOLINT + + src_data_2[i] = temp; + } + + std::map map_tensor; + map_tensor["t1"] = &tensor1; + map_tensor["t2"] = &tensor2; + + SaveTensorToDisk("test_1", map_tensor); + + std::map> load_map_tensor; + + LoadTensorFromDisk("test_1", &load_map_tensor); + + ASSERT_TRUE(load_map_tensor.find("t1") != load_map_tensor.end()); + ASSERT_TRUE(load_map_tensor.find("t2") != load_map_tensor.end()); + + auto new_tensor_1 = load_map_tensor["t1"]; + auto new_tensor_2 = load_map_tensor["t2"]; + + float* ptr_1 = tensor1.data(); + float* ptr_1_new = new_tensor_1->data(); + + for (int64_t i = 0; i < tensor1.numel(); ++i) { + ASSERT_EQ(ptr_1[i], ptr_1_new[i]); + } + + float* ptr_2 = tensor2.data(); + float* ptr_2_new = new_tensor_2->data(); + + for (int64_t i = 0; i < tensor2.numel(); ++i) { + ASSERT_EQ(ptr_2[i], ptr_2_new[i]); + } +} +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index cb3493b62a3..bd78027f4b7 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,6 +1,6 @@ set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper nccl_wrapper prune feed_fetch_method pass_builder parallel_executor profiler layer tracer engine scope_pool - analysis_predictor imperative_profiler nccl_context imperative_flag) + analysis_predictor imperative_profiler nccl_context imperative_flag save_load_util) if(WITH_PYTHON) list(APPEND PYBIND_DEPS py_func_op) diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index 68d01eb4a2c..cdbe2c15bff 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -148,6 +148,7 @@ void BindVarDsec(pybind11::module *m) { .def("set_name", &pd::VarDesc::SetName) .def("set_shape", &pd::VarDesc::SetShape) .def("set_shapes", &pd::VarDesc::SetShapes) + .def("get_shape", &pd::VarDesc::GetShape) .def("set_dtype", &pd::VarDesc::SetDataType) .def("set_dtypes", &pd::VarDesc::SetDataTypes) .def("shape", &pd::VarDesc::GetShape, diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 88999dcdda7..66780034a07 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -39,10 +39,12 @@ limitations under the License. */ #include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/reader.h" +#include "paddle/fluid/framework/save_load_util.h" #include "paddle/fluid/framework/scope_pool.h" #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/trainer.h" #include "paddle/fluid/framework/version.h" +#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/py_func_op.h" @@ -153,6 +155,88 @@ static inline int PlaceIndex(const PlaceType &p) { return static_cast(paddle::platform::Place(p).which()); } +static PyObject *GetPythonAttribute(PyObject *obj, const char *attr_name) { + // NOTE(zjl): PyObject_GetAttrString would return nullptr when attr_name + // is not inside obj, but it would also set the error flag of Python. + // If the error flag is set in C++, C++ code would not raise Exception, + // but Python would raise Exception once C++ call ends. + // To avoid unexpected Exception raised in Python, we check whether + // attribute exists before calling PyObject_GetAttrString. + // + // Caution: PyObject_GetAttrString would increase reference count of PyObject. + // Developer should call Py_DECREF manually after the attribute is not used. + if (PyObject_HasAttrString(obj, attr_name)) { + return PyObject_GetAttrString(obj, attr_name); + } else { + return nullptr; + } +} + +template +static T PyObjectCast(PyObject *obj) { + try { + return py::cast(py::handle(obj)); + } catch (py::cast_error &) { + PADDLE_THROW("Python object is not type of %s", typeid(T).name()); + } +} + +using PyNameVarBaseMap = std::unordered_map; + +static std::vector> GetVarBaseList( + const PyNameVarBaseMap &state_dict) { + std::vector> vec_res; + vec_res.reserve(state_dict.size()); + + for (auto ¶ : state_dict) { + PyObject *py_obj = para.second.ptr(); + if (!py_obj || py_obj == Py_None) { + PADDLE_THROW("Save parameter [%s] is None", para.first); + } + + const char *kIVarField = "_ivar"; + PyObject *py_ivar = GetPythonAttribute(py_obj, kIVarField); + PADDLE_ENFORCE_NOT_NULL(py_ivar, "Can not find ivar in Variable"); + + vec_res.emplace_back( + PyObjectCast>(py_ivar)); + Py_DECREF(py_ivar); + } + + return vec_res; +} + +static std::vector inline GetNameList( + const py::handle &py_handle) { + std::vector vec_res; + + PyObject *py_obj = py_handle.ptr(); // get underlying PyObject + // Python None is not nullptr in C++! + if (!py_obj || py_obj == Py_None) { + PADDLE_THROW("Save parameter list is None"); + } + + if (PyList_Check(py_obj)) { + size_t len = PyList_GET_SIZE(py_obj); + + vec_res.reserve(len); + + const char *kNameField = "name"; + + for (size_t i = 0; i < len; ++i) { + PyObject *py_name = + PyObject_GetAttrString(PyList_GET_ITEM(py_obj, i), kNameField); + PADDLE_ENFORCE_NOT_NULL(py_name); + vec_res.emplace_back(PyObjectCast(py_name)); + Py_DECREF(py_name); + } + } else { + PADDLE_THROW("Set parameter should be a list"); + } + + return vec_res; +} + #ifdef PADDLE_WITH_AVX PYBIND11_MODULE(core_avx, m) { #else @@ -174,6 +258,39 @@ PYBIND11_MODULE(core_noavx, m) { m.def("set_num_threads", &platform::SetNumThreads); + m.def("_save_static_dict", + [](const std::string &str_file_name, const py::handle &vec_var_list, + const Scope &scope) { + std::vector vec_name_list = GetNameList(vec_var_list); + SaveStaticNameListToDisk(str_file_name, vec_name_list, scope); + }); + + m.def("_load_static_dict", + [](const std::string &str_file_name, const py::handle &vec_var_list, + const Scope &scope) { + std::vector vec_name_list = GetNameList(vec_var_list); + LoadStaticNameListFromDisk(str_file_name, vec_name_list, scope); + }); + + m.def("_save_dygraph_dict", [](const std::string &str_file_name, + const PyNameVarBaseMap &state_dict) { + auto vec_var_base_list = GetVarBaseList(state_dict); + + SaveDygraphVarBaseListToDisk(str_file_name, vec_var_base_list); + }); + + m.def("_load_dygraph_dict", [](const std::string &str_file_name) { + auto load_tensor = LoadDygraphVarBaseListFromDisk(str_file_name); + + std::unordered_map> + map_output; + + for (size_t i = 0; i < load_tensor.size(); ++i) { + map_output.emplace(load_tensor[i]->Name(), load_tensor[i]); + } + + return map_output; + }); m.def("save_op_compatible_info", [](framework::ProgramDesc &desc) { framework::OpCompatibleMap op_compatible_map; op_compatible_map.InitOpCompatibleMap(); @@ -373,7 +490,8 @@ PYBIND11_MODULE(core_noavx, m) { }) .def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); }) // We implement offset based LOD in C++ while we use length based with - // Python API. So we changed set_lod to set_recursive_sequence_lengths to + // Python API. So we changed set_lod to set_recursive_sequence_lengths + // to // avoid misuse. // The discussion is here: // https://github.com/PaddlePaddle/Paddle/issues/10855 @@ -1707,7 +1825,8 @@ All parameter, weight, gradient are variables in Paddle. self.memory_optimize_ = (py_obj == Py_True); } else { PADDLE_THROW( - "BuildStrategy.memory_optimize must be None, False or True"); + "BuildStrategy.memory_optimize must be None, False or " + "True"); } }, R"DOC(The type is BOOL or None, memory opitimize aims to save total memory diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 5674cd08b71..527a7c45cb1 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -86,6 +86,8 @@ from paddle.fluid.layers.math_op_patch import monkey_patch_variable from . import install_check from .dygraph.nn import * from .dygraph.layers import * +from .io import save, load +from .dygraph.checkpoint import save_dygraph, load_dygraph Tensor = LoDTensor @@ -122,6 +124,8 @@ __all__ = framework.__all__ + executor.__all__ + \ 'unique_name', 'Scope', 'install_check', + 'save', + 'load', ] diff --git a/python/paddle/fluid/core.py b/python/paddle/fluid/core.py index accffc17448..e241c66501d 100644 --- a/python/paddle/fluid/core.py +++ b/python/paddle/fluid/core.py @@ -177,6 +177,10 @@ if avx_supported(): from .core_avx import _is_dygraph_debug_enabled from .core_avx import _dygraph_debug_level from .core_avx import _set_paddle_lib_path + from .core_avx import _save_static_dict + from .core_avx import _load_static_dict + from .core_avx import _save_dygraph_dict + from .core_avx import _load_dygraph_dict except Exception as e: if has_avx_core: raise e @@ -206,6 +210,10 @@ if load_noavx: from .core_noavx import _is_dygraph_debug_enabled from .core_noavx import _dygraph_debug_level from .core_noavx import _set_paddle_lib_path + from .core_noavx import _save_static_dict + from .core_noavx import _load_static_dict + from .core_noavx import _save_dygraph_dict + from .core_noavx import _load_dygraph_dict except Exception as e: if has_noavx_core: sys.stderr.write( diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index 631605bd0b1..6f615dd191c 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -16,224 +16,138 @@ from __future__ import print_function import os import collections -from ..framework import Variable, default_main_program +from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter import pickle from . import learning_rate_scheduler import warnings +from .. import core -__all__ = ['save_persistables', 'load_persistables'] +__all__ = [ + 'save_dygraph', + 'load_dygraph', +] -def save_persistables(model_dict, dirname='save_dir', optimizers=None): - """ - This function filters out all variables in layer.parameters from the give `layer`, and optimizer's learning rate decay. - And then trys to save these variables to the folder `dirname`. - - Use the `dirname` to specify the folder where persistable variables were - saved. - +@dygraph_only +def save_dygraph(state_dict, model_path): + ''' + Save Layer's state_dict to disk. This will generate a file with suffix ".pdparams" + + The state_dict is get from Layers.state_dict function + Args: - model_dict(dict of Parameters): The parameters will - be saved. If it is None, nothing - will be deal. - dirname(str): The directory path. - optimizers(fluid.Optimizer|list(fluid.Optimizer)|None): The optimizers to be saved + state_dict(dict) : The state dict to be saved. + model_path(str) : the file prefix to save the state_dict. The format is "dirname/file_prefix". If file_prefix is empty str. A exception will be raised Returns: None Examples: + .. code-block:: python + + import paddle.fluid as fluid + + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding( "emb", [10, 10]) + + state_dict = emb.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") + + adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000) ) + + state_dict = adam.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") + + ''' + + base_name = os.path.basename(model_path) + assert base_name != "", "model_path MUST be format of dirname/filename [dirname\\filename in Window], Now filename is empty str" + + suffix = ".pdparams" + assert len(state_dict) > 0, "state_dict is empty, no need to save" + + for k, v in state_dict.items(): + if not isinstance(v, Parameter): + suffix = ".pdopt" + break + + core._save_dygraph_dict(model_path + suffix, state_dict) + + +@dygraph_only +def load_dygraph(model_path): + ''' + Load parameter state_dict from disk. + + Args: + model_path(str) : The file prefix store the state_dict. (The path should Not contain suffix '.pdparams') + + Returns: + state_dict(dict) : the dict store the state_dict + Examples: .. code-block:: python - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) - sgd = fluid.optimizer.SGD(learning_rate=0.01) - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - x = to_variable(x_data) - y = to_variable(y_data) - init_hidden = to_variable(init_hidden_data) - init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) - dy_loss.backward() - sgd.minimize(dy_loss) - ptb_model.clear_gradient() - param_path = "./my_paddle_model" - fluid.dygraph.save_persistables(ptb_model.state_dict(), dirname=param_path, sgd) - """ - if isinstance(model_dict, collections.OrderedDict): - _save_var_to_file(model_dict, optimizers, dirname, None) - - -def load_persistables(dirname='save_dir'): - """ - This function trys to load persistable variables and optimizer's learning rate decay from the folder `dirname`. - And return the restored values in a dictionary way, respectively. - - Use the `dirname` to specify the folder where persistable variables were - saved. + import paddle.fluid as fluid + + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding( "emb", [10, 10]) + + state_dict = emb.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") + + adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000) ) + state_dict = adam.state_dict() + fluid.save_dygraph( state_dict, "padle_dy") + + para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy") + + ''' + + params_file_path = model_path + ".pdparams" + if not os.path.exists(params_file_path): + raise RuntimeError("Parameter file [ {} ] not exists".format( + params_file_path)) + + para_dict = core._load_dygraph_dict(params_file_path) + + opti_dict = None + opti_file_path = model_path + ".pdopt" + if os.path.exists(opti_file_path): + opti_dict = core._load_dygraph_dict(opti_file_path) + + return para_dict, opti_dict + + +@dygraph_only +def load_optimizer(model_path): + ''' + Load optimizer state_dict from disk. Args: - dirname(str): The directory path. default is save_dir + model_path(str) : The file prefix store the state_dict. (The path should Not contain shuffix '.pdparams') Returns: - layer_dict: The parameter-dict resumed from file - optimizer: The optimizer + state_dict(dict) : the dict store the state_dict Examples: + .. code-block:: python + + import paddle.fluid as fluid + + with fluid.dygraph.guard(): + adam = fluid.optimizer.Adam(0.001) + + state_dict = adam.state_dict() + fluid.save_optimizer( state_dict, "opt_adam") + + fluid.load_optimizer( "opt_adam") + + ''' - .. code-block:: python - - my_layer = layer(fluid.Layer) - param_path = "./my_paddle_model" - sgd = SGDOptimizer(learning_rate=1e-3) - param_dict, optimizer_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path) - param_1 = param_dict['PtbModel_0.w_1'] - sgd.load(optimizer_dict) - - """ - return _load_var_from_file(dirname) - - -def _save_var_to_file(stat_dict, optimizers, file_dir, file_name): - save_block = default_main_program().global_block() - save_var_map = {} - for var_key, each_var in stat_dict.items(): - save_var_map[each_var.name] = each_var - if file_name is None: - save_block.append_op( - type='save', - inputs={'X': [each_var]}, - outputs={}, - attrs={ - 'file_path': os.path.join(file_dir, - os.path.normpath(each_var.name)) - }) - - if optimizers is not None: - if isinstance(optimizers, (list, tuple)): - optimizers = optimizers - else: - optimizers = [optimizers] - if os.path.exists( - os.path.join(file_dir, os.path.normpath("optimizers"))): - pass - else: - os.mkdir(os.path.join(file_dir, os.path.normpath("optimizers"))) - for optimizer in optimizers: - if isinstance(optimizer._learning_rate, - learning_rate_scheduler.LearningRateDecay): - try: - f = open( - os.path.join(file_dir, "optimizers", - os.path.normpath(str(optimizer._name))), - "wb") - pickle.dump(optimizer._learning_rate, f, 2) - f.close() - except (): - raise IOError("Can't load %s", - os.path.join( - file_dir, "optimizers", - os.path.normpath(str(optimizer._name)))) - else: - warnings.warn( - "Optimizer not saved, Only optimizer with 'LearningRateDecay' under DyGraph mode need to be saved" - ) - else: - pass - - if file_name is not None: - save_var_list = [] - for name in sorted(save_var_map.keys()): - save_var_list.append(save_var_map[name]) - - save_block.append_op( - type='save_combine', - inputs={'X': save_var_list}, - outputs={}, - attrs={ - 'file_path': os.path.join(file_dir, os.path.normpath(file_name)) - }) - - -def _load_var_from_file(file_dir): - if not os.path.exists(file_dir): - raise IOError("{} not exist".format(file_dir)) - - def walk_filename(file_dir): - base_path = os.path.join(file_dir) - var_name_list = [] - if os.path.exists(base_path): - for dirpath, dirnames, filenames in os.walk(base_path): - if "optimizers" in dirpath: - continue - pt = dirpath.replace(base_path, "", 1) - if pt.startswith("/") or pt.startswith("\\"): - pt = pt[1:] - for fth_name in filenames: - if fth_name[0] != '.': - name_path = os.path.join(pt, fth_name) - if "\\" in name_path: - name_path = name_path.replace("\\", "/") - var_name_list.append(name_path) - - return var_name_list - - load_block = default_main_program().global_block() - load_var_map = {} - load_optimizer_map = {} - file_var_list = walk_filename(file_dir) - for var_name in file_var_list: - new_var = Variable(block=load_block, name=var_name) - load_block.append_op( - type='load', - inputs={}, - outputs={'Out': [new_var]}, - attrs={ - 'file_path': os.path.join(file_dir, - os.path.normpath(new_var.name)) - }) - - load_var_map[new_var.name] = new_var - opt_path = os.path.join(file_dir, "optimizers") - for _, _, optimizers in os.walk(opt_path): - for optimizer in optimizers: - try: - f = open(os.path.join(opt_path, optimizer), "rb") - load_optimizer_map[optimizer] = pickle.load(f) - f.close() - except IOError: - raise IOError("Can't load %s", - os.path.join( - file_dir, "optimizers", - os.path.normpath(str(optimizer._name)))) - if len(load_optimizer_map) == 0: - print( - "No optimizer loaded. If you didn't save optimizer, please ignore this. The program can still work with new optimizer. " - ) - pass - - return load_var_map, load_optimizer_map - - -def _clone_var_in_block_(block, var): - assert isinstance(var, Variable) - return block.create_var( - name=var.name, - shape=var.shape, - dtype=var.dtype, - type=var.type, - lod_level=0, - persistable=True) + assert in_dygraph_mode(), "save_optimizer only work in dygraph mode" + opt_file_path = model_path + ".pdopt" + if not os.path.exists(opt_file_path): + raise RuntimeError("Optimizer file [ {} ] not exists".format( + opt_file_path)) + return core._load_dygraph_dict(opt_file_path) diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index e53f0dd2b66..a08afb9f1bb 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -24,6 +24,7 @@ from paddle.fluid import core from .layer_object_helper import LayerObjectHelper from paddle.fluid import framework from ..param_attr import ParamAttr +from paddle.fluid.framework import Variable __all__ = ['Layer'] @@ -198,11 +199,11 @@ class Layer(core.Layer): """ assert isinstance(parameter, framework.Parameter) - if parameter.name in self._loaddict_holder: - var = parameter._ivar.value() - tensor = var.get_tensor() - tensor.set(self._loaddict_holder[parameter.name].numpy(), - framework._current_expected_place()) + if len(self._loaddict_holder) > 0: + assert parameter.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format( + parameter.name) + + parameter.set_value(self._loaddict_holder[parameter.name]) self._parameters[name] = parameter return parameter @@ -223,11 +224,12 @@ class Layer(core.Layer): if params is None: raise ValueError( "super(YourLayer, self).__init__() should be called first") - if value.name in self._loaddict_holder: - var = value._ivar.value() - tensor = var.get_tensor() - tensor.set(self._loaddict_holder[value.name].numpy(), - framework._current_expected_place()) + if len(self._loaddict_holder) > 0: + assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format( + value.name) + + value.set_value(self._loaddict_holder[value.name]) + if name in params: # remove unused param in tracer if framework._dygraph_tracer_ is not None: @@ -252,6 +254,27 @@ class Layer(core.Layer): object.__delattr__(self, name) def state_dict(self, destination=None, include_sublayers=True): + ''' + Get all parameter of current and sub-layers. And set all the parameters into a dict + + Args: + destination(dict|optical) : If provide, all the parameter will set to this dict . Defaul is None + include_sublayers(bool) : If true, also include the parameters from sublayers. + + Retruns: + state_dict(dict) : dict contains all the parameters + + Examples: + .. code-block:: python + import paddle.fluid as fluid + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding( "emb", [10, 10]) + + state_dict = emb.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") + + ''' + if destination is None: destination = collections.OrderedDict() for name, data in self._parameters.items(): @@ -268,14 +291,67 @@ class Layer(core.Layer): destination = destination_temp return destination + def set_dict(self, stat_dict, include_sublayers=True): + ''' + Set parameter from stat_dict. All the parameter will be reset by the tensor in the stat_dict + + Args: + state_dict(dict) : Dict contains all the Parameter + include_sublayers(bool) : If true, also include the parameters from sublayers. + Returns: + None + + Examples: + .. code-block:: python + import paddle.fluid as fluid + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding( "emb", [10, 10]) + + state_dict = emb.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") + + para_state_dict, _ = fluid.load_dygraph( "paddle_dy") + + emb.set_dict( para_state_dict ) + + ''' + self.load_dict(stat_dict, include_sublayers=include_sublayers) + def load_dict(self, stat_dict, include_sublayers=True): + ''' + Set parameter from stat_dict. All the parameter will be reset by the tensor in the stat_dict + + This api will be Deprecated. Please use set_dict + + Args: + state_dict(dict) : Dict contains all the Parameter + include_sublayers(bool) : If true, also include the parameters from sublayers. + Returns: + None + + Examples: + .. code-block:: python + import paddle.fluid as fluid + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding( "emb", [10, 10]) + + state_dict = emb.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") + + para_state_dict, _ = fluid.load_dygraph( "paddle_dy") + + emb.load_dict( para_state_dict ) + + ''' + self._loaddict_holder = stat_dict for name, item in self.__dict__.get('_parameters', None).items(): if item.name in stat_dict: - var = item._ivar.value() - tensor = var.get_tensor() - tensor.set(stat_dict[item.name].numpy(), - framework._current_expected_place()) + item.set_value(stat_dict[item.name]) + else: + raise RuntimeError( + "Parameter not found, Can't not find [ {} ] in stat_dict". + format(item.name)) if include_sublayers: for layer_name, layer_item in self._sub_layers.items(): diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 622eb247c7a..034c9dd482f 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -150,6 +150,19 @@ def is_compiled_with_cuda(): return core.is_compiled_with_cuda() +def _var_base_to_np(var_base): + """ + convert VarBase tp numpy + + Args: + var_base(VarBase) : the VarBase to convert + Returns (np.ndarray): the np.ndarray contain the value of VarBase + + """ + var = var_base._copy_to(core.CPUPlace(), True) + return np.array(var.value().get_tensor()) + + def cuda_places(device_ids=None): """ **Note**: @@ -491,6 +504,7 @@ class Variable(object): stop_gradient=False, is_data=False, need_check_feed=False, + belong_to_optimizer=False, **kwargs): self.block = block if name is None: @@ -500,6 +514,8 @@ class Variable(object): if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) + self.belong_to_optimizer = belong_to_optimizer + if in_dygraph_mode(): # record vars in tracer rather than blocks self._ivar = kwargs.get("ivar", None) @@ -700,15 +716,25 @@ class Variable(object): out = fc(t) # call with different weight """ - assert isinstance(value, (Variable, np.ndarray)) - if list(value.shape) != list(self.shape): - raise ValueError( - "The shape of the new value must be the same as that of the original Variable." - ) - self_tensor = self._ivar.value().get_tensor() + assert isinstance(value, (Variable, np.ndarray, core.VarBase)), \ + "Variable set_value function, arguments type only support Variable, numpy, VarBase" + + value_np = value if isinstance(value, Variable): - value = value._ivar.value().get_tensor().__array__() - self_tensor.set(value, _current_expected_place()) + value_np = value.numpy() + elif isinstance(value, core.VarBase): + value_np = _var_base_to_np(value) + self_tensor = self._ivar.value().get_tensor() + + self_tensor_np = np.array(self_tensor) + + assert self_tensor_np.shape == value_np.shape, \ + "Variable Shape not match, Variable [ {} ] need tensor with shape {} but load set tensor with shape {}".format( self._ivar.name, self_tensor_np.shape, value_np.shape) + + assert self_tensor_np.dtype == value_np.dtype, \ + "Variable dtype not match, Variable [ {} ] need tensor with dtype {} but load tensor with dtype {}".format( self._ivar.name, self_tensor_np.dtype, value_np.dtype) + + self_tensor.set(value_np, _current_expected_place()) @dygraph_only def backward(self, backward_strategy=None): diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index c2511824556..a408d8ba607 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -27,7 +27,7 @@ import paddle import paddle.reader from paddle.reader import * from paddle.fluid import layers -from paddle.fluid.executor import Executor +from paddle.fluid.executor import Executor, global_scope from paddle.fluid.evaluator import Evaluator from paddle.fluid.framework import Program, Parameter, default_main_program, default_startup_program, Variable, program_guard from paddle.fluid.compiler import CompiledProgram @@ -41,7 +41,8 @@ batch = paddle.batch __all__ = [ 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', - 'load_persistables', 'save_inference_model', 'load_inference_model', 'batch' + 'load_persistables', 'save_inference_model', 'load_inference_model', + 'batch', 'save', 'load' ] + reader.__all__ + paddle.reader.__all__ _logger = get_logger( @@ -94,6 +95,10 @@ def is_persistable(var): return var.persistable +def is_belong_to_optimizer(var): + return var.belong_to_optimizer + + def _clone_var_in_block_(block, var): assert isinstance(var, Variable) if var.desc.type() == core.VarDesc.VarType.LOD_TENSOR: @@ -1439,3 +1444,96 @@ def _load_persistable_nodes(executor, dirname, graph): else: _logger.warn("Cannot find the var %s!!!" % (node.name())) load_vars(executor=executor, dirname=dirname, vars=var_list) + + +def save(program, model_path): + """ + This function save parameters, optimizer information and network description to model_path. + + The parameters contains all the trainable Variable, will save to a file with suffix ".pdparams". + The optimizer information contains all the variable used by optimizer. For Adam optimizer, contains beta1, beta2, momentum etc. All the information will save to a file with suffix ".pdopt". (If the optimizer have no variable need to save (like SGD), the fill will not generated). + The network description is the description of the program. It's only used for deployment. The description will save to a file with a suffix ".pdmodel". + + Args: + program(Program) : The program to saved. + model_path(str): the file prefix to save the program. The format is "dirname/file_prefix". If file_prefix is empty str. A exception will be raised + + Returns: + None + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + prog = fluid.default_main_program() + fluid.save( prog, "./temp") + + """ + + base_name = os.path.basename(model_path) + assert base_name != "", \ + "model_path MUST be format of dirname/filename [dirname\\filename in Window], Now filename is empty str" + + parameter_list = list(filter(is_parameter, program.list_vars())) + paddle.fluid.core._save_static_dict(model_path + ".pdparams", + parameter_list, global_scope()) + + optimizer_var_list = list( + filter(is_belong_to_optimizer, program.list_vars())) + + paddle.fluid.core._save_static_dict(model_path + ".pdopt", + optimizer_var_list, global_scope()) + + main_program = program.clone() + program.desc.flush() + main_program.desc._set_version() + paddle.fluid.core.save_op_compatible_info(program.desc) + + with open(model_path + ".pdmodel", "wb") as f: + f.write(program.desc.serialize_to_string()) + + +def load(program, model_path): + """ + This function filter out parameters and optimizer information from program, and then get corresponding value from file. + An exception will throw if shape or dtype of the parameters is not match between program and loaded file. + + NOTICE: This function MUST called after run start_up_program + + Args: + program: The program to be load + model_path: The file prefix store the program + + Returns: + None + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + prog = fluid.default_main_program() + fluid.save( prog, "./temp") + + fluid.load( prog, "./temp") + + """ + + parameter_file_name = model_path + ".pdparams" + assert os.path.exists(parameter_file_name), \ + "Parameter file [{}] not exits".format( parameter_file_name) + + parameter_list = list(filter(is_parameter, program.list_vars())) + paddle.fluid.core._load_static_dict(parameter_file_name, parameter_list, + global_scope()) + + optimizer_var_list = list( + filter(is_belong_to_optimizer, program.list_vars())) + + if len(optimizer_var_list) > 0: + opt_file_name = model_path + ".pdopt" + assert os.path.exists(opt_file_name), \ + "Optimizer file [{}] not exits".format( opt_file_name) + paddle.fluid.core._load_static_dict(opt_file_name, optimizer_var_list, + global_scope()) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 20ce7f53c90..461820f6b3e 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -8242,7 +8242,11 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): if counter_name is None: counter_name = '@STEP_COUNTER@' counter, is_new_var = helper.create_or_get_global_variable( - name=counter_name, dtype='int64', shape=[1], persistable=True) + name=counter_name, + dtype='int64', + shape=[1], + persistable=True, + belong_to_optimizer=True) if is_new_var: helper.set_variable_initializer( counter, initializer=Constant( diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 0d84edc7b22..377cd2aa49c 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -32,6 +32,7 @@ from .layers import ops from .regularizer import append_regularization_ops from .dygraph import base as imperative_base from .dygraph.learning_rate_scheduler import LearningRateDecay +from .framework import _var_base_to_np from paddle.fluid import core from paddle.fluid.layers import tensor from functools import reduce @@ -95,90 +96,124 @@ class Optimizer(object): self._accumulators = defaultdict(lambda: dict()) self.helper = None self._opti_name_list = [] + self._accumulators_holder = {} - def load(self, stat_dict): - """ - load optimizer with learning rate decay in dygraph mode - :return: None + @framework.dygraph_only + def state_dict(self): + ''' + Get state dict information from optimizer. It contain all the variable used by optimizer. For Adam opimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be include in state dict. + If the optimzier never be called(minimize function), the state_dict is empty. - Args: - stat_dict: the dict load by load_persistable method + Args: None + Return: + state_dict(dict) : dict contains all the variablel used by optimizer + + Examples: + .. code-block:: python + import paddle.fluid as fluid + adam = fluid.optimizer.Adam(0.001) + state_dict = adam.state_dict() + + ''' + state_dict = {} + for k, v in self._accumulators.items(): + for para_name, var_tmp in v.items(): + state_dict[var_tmp.name] = var_tmp + # global step if use lr decay + if isinstance(self._learning_rate, LearningRateDecay): + var_temp = Variable(None, name='global_step', dtype='int32') + tensor.fill_constant( + [1], "int32", self._learning_rate.step_num, out=var_temp) + + state_dict['global_step'] = var_temp + return state_dict + + @framework.dygraph_only + def set_dict(self, state_dict): + ''' + Load optimizer state dict. For Adam opimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be changed. + + Args: + state_dict(dict) : Dict contains all the Variable needed by optimizer + Return: + None + Examples: + .. code-block:: python - .. code-block:: python + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding( "emb", [10, 10]) - from __future__ import print_function - import numpy as np - import paddle - import paddle.fluid as fluid - from paddle.fluid.optimizer import SGDOptimizer - from paddle.fluid.dygraph.nn import FC - from paddle.fluid.dygraph.base import to_variable - - class MLP(fluid.Layer): - def __init__(self, name_scope): - super(MLP, self).__init__(name_scope) - - self._fc1 = FC(self.full_name(), 10) - self._fc2 = FC(self.full_name(), 10) - - def forward(self, inputs): - y = self._fc1(inputs) - y = self._fc2(y) - return y - - with fluid.dygraph.guard(): - mlp = MLP('mlp') - optimizer2 = SGDOptimizer( - learning_rate=fluid.layers.natural_exp_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True)) + state_dict = emb.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=128, drop_last=True) - - for batch_id, data in enumerate(train_reader()): - dy_x_data = np.array( - [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - 128, 1) - - img = to_variable(dy_x_data) - label = to_variable(y_data) - label._stop_gradient = True - cost = mlp(img) - avg_loss = fluid.layers.reduce_mean(cost) - avg_loss.backward() - optimizer.minimize(avg_loss) - mlp.clear_gradients() - fluid.dygraph.save_persistables( - mlp.state_dict(), [optimizer, optimizer2], "save_dir_2") - if batch_id == 2: - break - - with fluid.dygraph.guard(): - mlp_load = MLP('mlp') - optimizer_load2 = SGDOptimizer( - learning_rate=fluid.layers.natural_exp_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True)) - parameters, optimizers = fluid.dygraph.load_persistables( - "save_dir_2") - mlp_load.load_dict(parameters) - optimizer_load2.load(optimizers) - self.assertTrue(optimizer2._learning_rate.__dict__ == optimizer_load2._learning_rate.__dict__) + adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000) ) + state_dict = adam.state_dict() + fluid.save_dygraph( state_dict, "padle_dy") - """ - if framework.in_dygraph_mode(): - self._learning_rate = stat_dict[self._name] - else: - raise TypeError("load can only be used under DyGraph mode") + para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy") + + adam.set_dict( opti_state_dict ) + + ''' + + if isinstance(self._learning_rate, LearningRateDecay): + assert 'global_step' in state_dict, \ + 'Global step not in state dict, Dygraph use LearningRateDecay, global_step must in state_dict' + global_step = state_dict['global_step'] + + if isinstance(global_step, core.VarBase): + step_np = global_step._copy_to(core.CPUPlace(), True) + step_np = np.array(step_np.value().get_tensor()) + assert step_np.shape == (1,), \ + "global step shape is (1,), the shape is {}".format( step_np.shape ) + + self._learning_rate.step_num = int(step_np[0]) + elif isinstance(global_step, Variable): + step_np = global_step.numpy() + assert step_np.shape == (1,), \ + "global step shape is (1,), the shape is {}".format( step_np.shape ) + self._learning_rate.step_num = step_np[0] + elif isinstance(global_step, np.ndarray): + assert global_step.shape == (1,), \ + "global step shape is (1,), the shape is {}".format( global_step.shape ) + self._learning_rate.step_num = global_step[0] + else: + raise RuntimeError( + "Type not supprt, value in state dict must be [VarBase, Variable, numpy], the type is ", + type(global_step)) + + self._accumulators_holder = state_dict + for k, v in self._accumulators.items(): + for para_name, var_tmp in v.items(): + assert var_tmp.name in state_dict, \ + "optimizer variable {} not found".format( var_tmp.name ) + var = var_tmp._ivar.value() + tensor = var.get_tensor() + model_np = np.array(tensor) + + load_para = state_dict[var_tmp.name] + + if isinstance(load_para, Variable): + load_para_np = load_para.numpy() + elif isinstance(load_para, core.VarBase): + load_para_np = _var_base_to_np(load_para) + elif isinstance(load_para, np.ndarray): + load_para_np = load_para + else: + raise RuntimeError("State dict type {} not supprt".format( + str(type(load_para)))) + + assert model_np.shape == load_para_np.shape, \ + "Parameter shape not match, Dygraph Parameter [ {} ] need tensor with shape {} but load tensor with shape {}".format( + item.name, model_np.shape, load_para_np.shape) + + assert model_np.dtype == load_para_np.dtype, \ + "Parameter dtype not match, Dygraph Parameter [ {} ] need tensor with dtype {} but load tensor with dtype {}".format( + item.name, model_np.dtype, load_para_np.dtype) + + tensor.set(load_para_np, framework._current_expected_place()) def get_opti_var_name_list(self): return self._opti_name_list @@ -315,9 +350,17 @@ class Optimizer(object): persistable=True, dtype=dtype or param.dtype, type=param.type, - shape=shape) + shape=shape, + belong_to_optimizer=True) self.helper.set_variable_initializer( var, initializer=Constant(value=float(fill_value))) + + if framework.in_dygraph_mode(): + if len(self._accumulators_holder) > 0: + assert var_name in self._accumulators_holder, \ + "Optimizer set error, {} should in state dict".format( var_name ) + var.set_value(self._accumulators_holder[var_name]) + self._accumulators[name][param.name] = var return var diff --git a/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py b/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py deleted file mode 100644 index 609662cf988..00000000000 --- a/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import numpy as np - -import paddle -import paddle.fluid as fluid -from paddle.fluid.optimizer import SGDOptimizer -from paddle.fluid import Conv2D, Pool2D, FC, core -from paddle.fluid.dygraph.base import to_variable - - -class SimpleImgConvPool(fluid.Layer): - def __init__(self, - name_scope, - num_filters, - filter_size, - pool_size, - pool_stride, - pool_padding=0, - pool_type='max', - global_pooling=False, - conv_stride=1, - conv_padding=0, - conv_dilation=1, - conv_groups=1, - act=None, - use_cudnn=False, - param_attr=None, - bias_attr=None): - super(SimpleImgConvPool, self).__init__(name_scope) - - self._conv2d = Conv2D( - self.full_name(), - num_filters=num_filters, - filter_size=filter_size, - stride=conv_stride, - padding=conv_padding, - dilation=conv_dilation, - groups=conv_groups, - param_attr=None, - bias_attr=None, - use_cudnn=use_cudnn) - - self._pool2d = Pool2D( - self.full_name(), - pool_size=pool_size, - pool_type=pool_type, - pool_stride=pool_stride, - pool_padding=pool_padding, - global_pooling=global_pooling, - use_cudnn=use_cudnn) - - def forward(self, inputs): - x = self._conv2d(inputs) - x = self._pool2d(x) - return x - - -class MNIST(fluid.Layer): - def __init__(self, name_scope): - super(MNIST, self).__init__(name_scope) - - self._simple_img_conv_pool_1 = SimpleImgConvPool( - self.full_name(), 20, 5, 2, 2, act="relu") - - self._simple_img_conv_pool_2 = SimpleImgConvPool( - self.full_name(), 50, 5, 2, 2, act="relu") - - pool_2_shape = 50 * 4 * 4 - SIZE = 10 - scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 - self._fc = FC(self.full_name(), - 10, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale)), - act="softmax") - - def forward(self, inputs): - x = self._simple_img_conv_pool_1(inputs) - x = self._simple_img_conv_pool_2(x) - x = self._fc(x) - return x - - -class TestDygraphCheckpoint(unittest.TestCase): - def reader_decorator(self, reader): - def _reader_imple(): - for item in reader(): - image = np.array(item[0]).reshape(1, 28, 28) - label = np.array(item[1]).astype('int64').reshape(1) - yield image, label - - return _reader_imple - - def test_save_load_persistables(self): - seed = 90 - epoch_num = 1 - batch_size = 128 - - with fluid.dygraph.guard(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - - mnist = MNIST("mnist") - sgd = SGDOptimizer(learning_rate=1e-3) - - batch_py_reader = fluid.io.PyReader(capacity=1) - batch_py_reader.decorate_sample_list_generator( - paddle.batch( - self.reader_decorator(paddle.dataset.mnist.train()), - batch_size=batch_size, - drop_last=True), - places=fluid.CPUPlace()) - - dy_param_init_value = {} - - for epoch in range(epoch_num): - for batch_id, data in enumerate(batch_py_reader()): - img = data[0] - label = data[1] - label.stop_gradient = True - - cost = mnist(img) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) - - dy_out = avg_loss.numpy() - - avg_loss.backward() - sgd.minimize(avg_loss) - fluid.dygraph.save_persistables(mnist.state_dict(), - "save_dir") - mnist.clear_gradients() - - for param in mnist.parameters(): - dy_param_init_value[param.name] = param.numpy() - - restore, _ = fluid.dygraph.load_persistables("save_dir") - - self.assertRaises(IOError, fluid.dygraph.load_persistables, - "not_exist_dir") - - mnist.load_dict(restore) - - self.assertEqual(len(dy_param_init_value), len(restore)) - for ky, value in restore.items(): - self.assertTrue( - np.allclose(value.numpy(), dy_param_init_value[ - value.name])) - self.assertTrue(np.isfinite(value.numpy().all())) - self.assertFalse(np.isnan(value.numpy().any())) - - if batch_id > 10: - break - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py new file mode 100644 index 00000000000..56fa716110c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -0,0 +1,903 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.dygraph.nn import Embedding, FC +import paddle.fluid.framework as framework +from paddle.fluid.optimizer import Adam +from paddle.fluid.dygraph.base import to_variable +from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay +from test_imperative_base import new_program_scope +import numpy as np +import six + + +class SimpleLSTMRNN(fluid.Layer): + def __init__(self, + name_scope, + hidden_size, + num_steps, + num_layers=2, + init_scale=0.1, + dropout=None): + super(SimpleLSTMRNN, self).__init__(name_scope) + self._hidden_size = hidden_size + self._num_layers = num_layers + self._init_scale = init_scale + self._dropout = dropout + self._input = None + self._num_steps = num_steps + self.cell_array = [] + self.hidden_array = [] + + def _build_once(self, input_embedding, init_hidden=None, init_cell=None): + self.weight_1_arr = [] + self.weight_2_arr = [] + self.bias_arr = [] + self.mask_array = [] + + for i in range(self._num_layers): + weight_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 2, self._hidden_size * 4], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)) + self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) + bias_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 4], + dtype="float32", + default_initializer=fluid.initializer.Constant(0.0)) + self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) + + def forward(self, input_embedding, init_hidden=None, init_cell=None): + self.cell_array = [] + self.hidden_array = [] + + for i in range(self._num_layers): + pre_hidden = fluid.layers.slice( + init_hidden, axes=[0], starts=[i], ends=[i + 1]) + pre_cell = fluid.layers.slice( + init_cell, axes=[0], starts=[i], ends=[i + 1]) + pre_hidden = fluid.layers.reshape( + pre_hidden, shape=[-1, self._hidden_size]) + pre_cell = fluid.layers.reshape( + pre_cell, shape=[-1, self._hidden_size]) + self.hidden_array.append(pre_hidden) + self.cell_array.append(pre_cell) + + res = [] + for index in range(self._num_steps): + self._input = fluid.layers.slice( + input_embedding, axes=[1], starts=[index], ends=[index + 1]) + self._input = fluid.layers.reshape( + self._input, shape=[-1, self._hidden_size]) + for k in range(self._num_layers): + pre_hidden = self.hidden_array[k] + pre_cell = self.cell_array[k] + weight_1 = self.weight_1_arr[k] + bias = self.bias_arr[k] + + nn = fluid.layers.concat([self._input, pre_hidden], 1) + gate_input = fluid.layers.matmul(x=nn, y=weight_1) + + gate_input = fluid.layers.elementwise_add(gate_input, bias) + i, j, f, o = fluid.layers.split( + gate_input, num_or_sections=4, dim=-1) + c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( + i) * fluid.layers.tanh(j) + m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) + self.hidden_array[k] = m + self.cell_array[k] = c + self._input = m + + if self._dropout is not None and self._dropout > 0.0: + self._input = fluid.layers.dropout( + self._input, + dropout_prob=self._dropout, + dropout_implementation='upscale_in_train') + res.append( + fluid.layers.reshape( + self._input, shape=[1, -1, self._hidden_size])) + real_res = fluid.layers.concat(res, 0) + real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + last_hidden = fluid.layers.concat(self.hidden_array, 1) + last_hidden = fluid.layers.reshape( + last_hidden, shape=[-1, self._num_layers, self._hidden_size]) + last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_cell = fluid.layers.concat(self.cell_array, 1) + last_cell = fluid.layers.reshape( + last_cell, shape=[-1, self._num_layers, self._hidden_size]) + last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + return real_res, last_hidden, last_cell + + +class PtbModel(fluid.Layer): + def __init__(self, + name_scope, + hidden_size, + vocab_size, + num_layers=2, + num_steps=20, + init_scale=0.1, + dropout=None): + super(PtbModel, self).__init__(name_scope) + self.hidden_size = hidden_size + self.vocab_size = vocab_size + self.init_scale = init_scale + self.num_layers = num_layers + self.num_steps = num_steps + self.dropout = dropout + self.simple_lstm_rnn = SimpleLSTMRNN( + self.full_name(), + hidden_size, + num_steps, + num_layers=num_layers, + init_scale=init_scale, + dropout=dropout) + self.embedding = Embedding( + self.full_name(), + size=[vocab_size, hidden_size], + dtype='float32', + is_sparse=False, + param_attr=fluid.ParamAttr( + name='embedding_para', + initializer=fluid.initializer.UniformInitializer( + low=-init_scale, high=init_scale))) + + self.out_project = FC(self.full_name(), + self.vocab_size, + num_flatten_dims=2) + + def forward(self, input, label, init_hidden, init_cell): + init_h = fluid.layers.reshape( + init_hidden, shape=[self.num_layers, -1, self.hidden_size]) + + init_c = fluid.layers.reshape( + init_cell, shape=[self.num_layers, -1, self.hidden_size]) + + x_emb = self.embedding(input) + x_emb = fluid.layers.reshape( + x_emb, shape=[-1, self.num_steps, self.hidden_size]) + if self.dropout is not None and self.dropout > 0.0: + x_emb = fluid.layers.dropout( + x_emb, + dropout_prob=self.drop_out, + dropout_implementation='upscale_in_train') + rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, + init_c) + rnn_out = fluid.layers.reshape( + rnn_out, shape=[-1, self.num_steps, self.hidden_size]) + + projection = self.out_project(rnn_out) + projection = fluid.layers.reshape( + projection, shape=[-1, self.vocab_size]) + loss = fluid.layers.softmax_with_cross_entropy( + logits=projection, label=label, soft_label=False) + loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) + loss = fluid.layers.reduce_mean(loss, dim=[0]) + loss = fluid.layers.reduce_sum(loss) + loss.permissions = True + + return loss, last_hidden, last_cell + + +class TestDygraphPtbRnn(unittest.TestCase): + def setUp(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [1.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + new_lr = 1.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam(learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr)) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + if i == 0: + for param in ptb_model.parameters(): + dy_param_init[param.name] = param.numpy() + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + if i == batch_num - 1: + for param in ptb_model.parameters(): + dy_param_updated[param.name] = param.numpy() + + # check optimizer + self.opti_dict = adam.state_dict() + self.base_opti = {} + for k, v in self.opti_dict.items(): + self.base_opti[v.name] = v.numpy() + self.assertTrue(np.sum(np.abs(v.numpy())) != 0) + + fluid.save_dygraph(self.opti_dict, "./test_dy") + + self.state_dict = ptb_model.state_dict() + self.model_base = {} + for k, v in self.state_dict.items(): + np_t = v.numpy() + self.model_base[v.name] = np_t + + fluid.save_dygraph(self.state_dict, "./test_dy") + + def testLoadAndSetVarBase(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [1.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + new_lr = 1.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam(learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr)) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + if i == 0: + for param in ptb_model.parameters(): + dy_param_init[param.name] = param.numpy() + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + if i == batch_num - 1: + for param in ptb_model.parameters(): + dy_param_updated[param.name] = param.numpy() + + # check optimizer + opti_dict = adam.state_dict() + # set to zero + for k, v in opti_dict.items(): + np_t = v.numpy() + var = v._ivar.value().get_tensor() + var.set(np.zeros_like(np_t), place) + + self.assertTrue(np.sum(np.abs(v.numpy())) == 0) + + if isinstance(adam._learning_rate, LearningRateDecay): + adam._learning_rate.step_num = 0 + + para_state_dict, opti_state_dict = fluid.load_dygraph("./test_dy") + adam.set_dict(opti_state_dict) + + opti_dict = adam.state_dict() + for k, v in opti_dict.items(): + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name])) + + # check parameter + state_dict = ptb_model.state_dict() + for k, v in state_dict.items(): + np_t = v.numpy() + var = v._ivar.value().get_tensor() + + var.set(np.zeros_like(np_t), place) + + ptb_model.set_dict(para_state_dict) + + state_dict = ptb_model.state_dict() + + for k, v in state_dict.items(): + new_t = v.numpy() + + base_t = self.model_base[v.name] + + self.assertTrue(np.array_equal(new_t, base_t)) + + def testSetVariable(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [1.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + new_lr = 1.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam(learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr)) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + if i == 0: + for param in ptb_model.parameters(): + dy_param_init[param.name] = param.numpy() + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + if i == batch_num - 1: + for param in ptb_model.parameters(): + dy_param_updated[param.name] = param.numpy() + + # check optimizer + opti_dict = adam.state_dict() + # set to zero + for k, v in opti_dict.items(): + np_t = v.numpy() + var = v._ivar.value().get_tensor() + var.set(np.zeros_like(np_t), place) + + self.assertTrue(np.sum(np.abs(v.numpy())) == 0) + + if isinstance(adam._learning_rate, LearningRateDecay): + adam._learning_rate.step_num = 0 + + adam.set_dict(self.opti_dict) + + opti_dict = adam.state_dict() + for k, v in opti_dict.items(): + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name])) + + # check parameter + state_dict = ptb_model.state_dict() + for k, v in state_dict.items(): + np_t = v.numpy() + var = v._ivar.value().get_tensor() + + var.set(np.zeros_like(np_t), place) + + ptb_model.set_dict(self.state_dict) + + state_dict = ptb_model.state_dict() + + for k, v in state_dict.items(): + new_t = v.numpy() + + base_t = self.model_base[v.name] + + self.assertTrue(np.array_equal(new_t, base_t)) + + def testSetNumpy(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [1.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + new_lr = 1.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam(learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr)) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + if i == 0: + for param in ptb_model.parameters(): + dy_param_init[param.name] = param.numpy() + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + if i == batch_num - 1: + for param in ptb_model.parameters(): + dy_param_updated[param.name] = param.numpy() + + # check optimizer + opti_dict = adam.state_dict() + np_opti_dict = {} + # set to zero + for k, v in opti_dict.items(): + np_t = v.numpy() + np_opti_dict[v.name] = np_t + var = v._ivar.value().get_tensor() + var.set(np.zeros_like(np_t), place) + + self.assertTrue(np.sum(np.abs(v.numpy())) == 0) + + if isinstance(adam._learning_rate, LearningRateDecay): + adam._learning_rate.step_num = 0 + + adam.set_dict(np_opti_dict) + + opti_dict = adam.state_dict() + for k, v in opti_dict.items(): + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name])) + + # check parameter + state_dict = ptb_model.state_dict() + np_state_dict = {} + for k, v in state_dict.items(): + np_t = v.numpy() + np_state_dict[v.name] = np_t + var = v._ivar.value().get_tensor() + + var.set(np.zeros_like(np_t), place) + + ptb_model.set_dict(np_state_dict) + + state_dict = ptb_model.state_dict() + + for k, v in state_dict.items(): + new_t = v.numpy() + + base_t = self.model_base[v.name] + + self.assertTrue(np.array_equal(new_t, base_t)) + + def testSetVariableBeforeTrain(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [1.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + #set lr to 0.0, not udpate parameter + new_lr = 0.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + beta1=0.8, + beta2=0.6) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + adam.set_dict(self.opti_dict) + ptb_model.set_dict(self.state_dict) + + for i in range(1): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + + opti_dict = adam.state_dict() + for k, v in opti_dict.items(): + if k == "global_step": + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] + 1)) + + if k.find("beta1_pow_acc_0") > 0: + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] * + adam._beta1)) + if k.find("beta2_pow_acc_0") > 0: + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] * + adam._beta2)) + + # check parameter + + state_dict = ptb_model.state_dict() + + for k, v in state_dict.items(): + new_t = v.numpy() + + base_t = self.model_base[v.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + def testLoadAndSetVarBaseBeforeTrain(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [1.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + # set lr to zero not update parameter + new_lr = 0.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + beta1=0.8, + beta2=0.6) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + state_dict, opti_dict = fluid.load_dygraph("./test_dy") + adam.set_dict(opti_dict) + ptb_model.set_dict(state_dict) + + for i in range(1): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + + opti_dict = adam.state_dict() + for k, v in opti_dict.items(): + if k == "global_step": + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] + 1)) + + if k.find("beta1_pow_acc_0") > 0: + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] * + adam._beta1)) + if k.find("beta2_pow_acc_0") > 0: + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] * + adam._beta2)) + + # check parameter + + state_dict = ptb_model.state_dict() + + for k, v in state_dict.items(): + new_t = v.numpy() + + base_t = self.model_base[v.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + def testSetNumpyBeforeTrain(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [1.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + # set lr to 0.0, not update parameter + new_lr = 0.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + beta1=0.8, + beta2=0.6) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + np_opti_dict = {} + np_state_dict = {} + + for k, v in self.opti_dict.items(): + np_opti_dict[v.name] = v.numpy() + + for k, v in self.state_dict.items(): + np_state_dict[v.name] = v.numpy() + + adam.set_dict(np_opti_dict) + ptb_model.set_dict(np_state_dict) + + for i in range(1): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + + opti_dict = adam.state_dict() + for k, v in opti_dict.items(): + if k == "global_step": + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] + 1)) + + if k.find("beta1_pow_acc_0") > 0: + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] * + adam._beta1)) + if k.find("beta2_pow_acc_0") > 0: + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] * + adam._beta2)) + + # check parameter + + state_dict = ptb_model.state_dict() + + for k, v in state_dict.items(): + new_t = v.numpy() + + base_t = self.model_base[v.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + def testOnlyLoadParams(self): + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding("emb", [10, 10]) + state_dict = emb.state_dict() + fluid.save_dygraph(state_dict, "emb_dy") + + para_state_dict, opti_state_dict = fluid.load_dygraph("emb_dy") + + self.assertTrue(opti_state_dict == None) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_optimizer.py deleted file mode 100644 index a65db433e66..00000000000 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_optimizer.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import unittest -import numpy as np - -import paddle -import paddle.fluid as fluid -from paddle.fluid.optimizer import SGDOptimizer, Adam -from paddle.fluid.dygraph.nn import FC -from paddle.fluid.dygraph.base import to_variable - - -class MLP(fluid.Layer): - def __init__(self, name_scope): - super(MLP, self).__init__(name_scope) - - self._fc1 = FC(self.full_name(), 10) - self._fc2 = FC(self.full_name(), 10) - - def forward(self, inputs): - y = self._fc1(inputs) - y = self._fc2(y) - return y - - -class TestImperativeOptimizerBase(unittest.TestCase): - def setUp(self): - self.batch_num = 20 - - def get_optimizer(self): - raise NotImplementedError() - - def _check_mlp(self): - seed = 90 - with fluid.dygraph.guard(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - - mlp = MLP('mlp') - optimizer = self.get_optimizer() - optimizer2 = SGDOptimizer( - learning_rate=fluid.layers.natural_exp_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True)) - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=128, drop_last=True) - - for batch_id, data in enumerate(train_reader()): - dy_x_data = np.array( - [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - 128, 1) - - img = to_variable(dy_x_data) - label = to_variable(y_data) - label._stop_gradient = True - - cost = mlp(img) - avg_loss = fluid.layers.reduce_mean(cost) - - avg_loss.backward() - optimizer.minimize(avg_loss) - optimizer2.minimize(avg_loss) - mlp.clear_gradients() - fluid.dygraph.save_persistables(mlp.state_dict(), "save_dir_2", - [optimizer, optimizer2]) - if batch_id == 2: - break - - with fluid.dygraph.guard(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - - mlp_load = MLP('mlp') - optimizer_load1 = self.get_optimizer() - optimizer_load2 = SGDOptimizer( - learning_rate=fluid.layers.natural_exp_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True)) - parameters, optimizers = fluid.dygraph.load_persistables( - "save_dir_2") - mlp_load.load_dict(parameters) - optimizer_load1.load(optimizers) - optimizer_load2.load(optimizers) - - self.assertTrue(optimizer._learning_rate.__dict__ == - optimizer_load1._learning_rate.__dict__) - self.assertTrue(optimizer2._learning_rate.__dict__ == - optimizer_load2._learning_rate.__dict__) - - -class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase): - def get_optimizer(self): - bd = [3, 6, 9] - optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)])) - return optimizer - - def test_sgd(self): - self._check_mlp() - - -class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase): - def get_optimizer(self): - optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True)) - return optimizer - - def test_sgd(self): - self._check_mlp() - - -class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase): - def get_optimizer(self): - optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True)) - return optimizer - - def test_sgd(self): - self._check_mlp() - - -class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase): - def get_optimizer(self): - optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True)) - return optimizer - - def test_adam(self): - self._check_mlp() - - -class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase): - def get_optimizer(self): - optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay( - learning_rate=0.1, decay_steps=5, cycle=self.cycle)) - return optimizer - - def test_sgd_cycle(self): - self.cycle = True - self._check_mlp() - - def test_sgd(self): - self.cycle = False - self._check_mlp() - - -class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase): - def get_optimizer(self): - optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay( - learning_rate=0.1, step_each_epoch=10000, epochs=120)) - return optimizer - - def test_sgd(self): - self._check_mlp() - - -class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase): - def get_optimizer(self): - optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay( - d_model=512, warmup_steps=8000)) - return optimizer - - def test_sgd(self): - self._check_mlp() - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 8497be489e5..168d4ddd896 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -180,7 +180,7 @@ class TestLayer(LayerTest): self.assertFalse(np.array_equal(out1.numpy(), out2.numpy())) mismatched_weight = np.random.randn(4, 4).astype("float32") - with self.assertRaises(ValueError): + with self.assertRaises(AssertionError): fc2.weight.set_value(mismatched_weight) fc2.weight.set_value(fc1_weight_init) fc2.bias.set_value(fc1_bias_init) diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py new file mode 100644 index 00000000000..5d54c9ebc81 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -0,0 +1,431 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.dygraph.nn import Embedding +import paddle.fluid.framework as framework +from paddle.fluid.optimizer import Adam +from paddle.fluid.dygraph.base import to_variable +from test_imperative_base import new_program_scope +import numpy as np +import six + + +class SimpleLSTMRNN(fluid.Layer): + def __init__(self, + name_scope, + hidden_size, + num_steps, + num_layers=2, + init_scale=0.1, + dropout=None): + super(SimpleLSTMRNN, self).__init__(name_scope) + self._hidden_size = hidden_size + self._num_layers = num_layers + self._init_scale = init_scale + self._dropout = dropout + self._input = None + self._num_steps = num_steps + self.cell_array = [] + self.hidden_array = [] + + def _build_once(self, input_embedding, init_hidden=None, init_cell=None): + self.weight_1_arr = [] + self.weight_2_arr = [] + self.bias_arr = [] + self.mask_array = [] + + for i in range(self._num_layers): + weight_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 2, self._hidden_size * 4], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)) + self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) + bias_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 4], + dtype="float32", + default_initializer=fluid.initializer.Constant(0.0)) + self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) + + def forward(self, input_embedding, init_hidden=None, init_cell=None): + self.cell_array = [] + self.hidden_array = [] + + for i in range(self._num_layers): + pre_hidden = fluid.layers.slice( + init_hidden, axes=[0], starts=[i], ends=[i + 1]) + pre_cell = fluid.layers.slice( + init_cell, axes=[0], starts=[i], ends=[i + 1]) + pre_hidden = fluid.layers.reshape( + pre_hidden, shape=[-1, self._hidden_size]) + pre_cell = fluid.layers.reshape( + pre_cell, shape=[-1, self._hidden_size]) + self.hidden_array.append(pre_hidden) + self.cell_array.append(pre_cell) + + res = [] + for index in range(self._num_steps): + self._input = fluid.layers.slice( + input_embedding, axes=[1], starts=[index], ends=[index + 1]) + self._input = fluid.layers.reshape( + self._input, shape=[-1, self._hidden_size]) + for k in range(self._num_layers): + pre_hidden = self.hidden_array[k] + pre_cell = self.cell_array[k] + weight_1 = self.weight_1_arr[k] + bias = self.bias_arr[k] + + nn = fluid.layers.concat([self._input, pre_hidden], 1) + gate_input = fluid.layers.matmul(x=nn, y=weight_1) + + gate_input = fluid.layers.elementwise_add(gate_input, bias) + i, j, f, o = fluid.layers.split( + gate_input, num_or_sections=4, dim=-1) + c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( + i) * fluid.layers.tanh(j) + m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) + self.hidden_array[k] = m + self.cell_array[k] = c + self._input = m + + if self._dropout is not None and self._dropout > 0.0: + self._input = fluid.layers.dropout( + self._input, + dropout_prob=self._dropout, + dropout_implementation='upscale_in_train') + res.append( + fluid.layers.reshape( + self._input, shape=[1, -1, self._hidden_size])) + real_res = fluid.layers.concat(res, 0) + real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + last_hidden = fluid.layers.concat(self.hidden_array, 1) + last_hidden = fluid.layers.reshape( + last_hidden, shape=[-1, self._num_layers, self._hidden_size]) + last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_cell = fluid.layers.concat(self.cell_array, 1) + last_cell = fluid.layers.reshape( + last_cell, shape=[-1, self._num_layers, self._hidden_size]) + last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + return real_res, last_hidden, last_cell + + +class PtbModel(fluid.Layer): + def __init__(self, + name_scope, + hidden_size, + vocab_size, + num_layers=2, + num_steps=20, + init_scale=0.1, + dropout=None): + super(PtbModel, self).__init__(name_scope) + self.hidden_size = hidden_size + self.vocab_size = vocab_size + self.init_scale = init_scale + self.num_layers = num_layers + self.num_steps = num_steps + self.dropout = dropout + self.simple_lstm_rnn = SimpleLSTMRNN( + self.full_name(), + hidden_size, + num_steps, + num_layers=num_layers, + init_scale=init_scale, + dropout=dropout) + self.embedding = Embedding( + self.full_name(), + size=[vocab_size, hidden_size], + dtype='float32', + is_sparse=False, + param_attr=fluid.ParamAttr( + name='embedding_para', + initializer=fluid.initializer.UniformInitializer( + low=-init_scale, high=init_scale))) + self.softmax_weight = self.create_parameter( + attr=fluid.ParamAttr(), + shape=[self.hidden_size, self.vocab_size], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self.init_scale, high=self.init_scale)) + self.softmax_bias = self.create_parameter( + attr=fluid.ParamAttr(), + shape=[self.vocab_size], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self.init_scale, high=self.init_scale)) + + def forward(self, input, label, init_hidden, init_cell): + init_h = fluid.layers.reshape( + init_hidden, shape=[self.num_layers, -1, self.hidden_size]) + + init_c = fluid.layers.reshape( + init_cell, shape=[self.num_layers, -1, self.hidden_size]) + + x_emb = self.embedding(input) + x_emb = fluid.layers.reshape( + x_emb, shape=[-1, self.num_steps, self.hidden_size]) + if self.dropout is not None and self.dropout > 0.0: + x_emb = fluid.layers.dropout( + x_emb, + dropout_prob=self.drop_out, + dropout_implementation='upscale_in_train') + rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, + init_c) + + rnn_out = fluid.layers.reshape( + rnn_out, shape=[-1, self.num_steps, self.hidden_size]) + projection = fluid.layers.matmul(rnn_out, self.softmax_weight) + projection = fluid.layers.elementwise_add(projection, self.softmax_bias) + projection = fluid.layers.reshape( + projection, shape=[-1, self.vocab_size]) + loss = fluid.layers.softmax_with_cross_entropy( + logits=projection, label=label, soft_label=False) + loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) + loss = fluid.layers.reduce_mean(loss, dim=[0]) + loss = fluid.layers.reduce_sum(loss) + loss.permissions = True + + return loss, last_hidden, last_cell + + +class TestDygraphPtbRnn(unittest.TestCase): + def test_ptb_rnn_cpu_float32(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps, 1], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, + framework.Parameter) or var.belong_to_optimizer: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimzier var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + fluid.save(main_program, "./test_1") + + # set var to zero + for var in main_program.list_vars(): + if isinstance(var, + framework.Parameter) or var.belong_to_optimizer: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimzier var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.load(main_program, "./test_1") + + for var in main_program.list_vars(): + if isinstance(var, + framework.Parameter) or var.belong_to_optimizer: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + +class TestDygraphPtbRnnPartial(unittest.TestCase): + def test_ptb_rnn_cpu_float32(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps, 1], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + + test_program = fluid.default_main_program().clone(for_test=True) + + add_1 = fluid.layers.fc(static_last_hidden, + size=hidden_size, + num_flatten_dims=2, + bias_attr=False) + + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, + framework.Parameter) or var.belong_to_optimizer: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimzier var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + fluid.save(main_program, "./test_1") + + # set var to zero + for var in main_program.list_vars(): + if isinstance(var, + framework.Parameter) or var.belong_to_optimizer: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimzier var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.load(test_program, "./test_1") + + for var in test_program.list_vars(): + if isinstance(var, + framework.Parameter) or var.belong_to_optimizer: + print(var.name) + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + +if __name__ == '__main__': + unittest.main() -- GitLab