diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index 8a28292fb7cf4cde4411c77b25dc80c8d3d4a268..261f6e807a22d328a20156bed8ee9974637898c3 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -58,7 +58,6 @@ namespace framework { std::once_flag gflags_init_flag; std::once_flag glog_init_flag; std::once_flag p2p_init_flag; -std::once_flag glog_warning_once_flag; bool InitGflags(std::vector args) { bool successed = false; @@ -260,22 +259,22 @@ const char *ParseSignalErrorString(const std::string &str) { } // Handle SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGBUS, and SIGTERM. -std::ostringstream signal_msg_dumper; void SignalHandle(const char *data, int size) { try { // NOTE1: The glog FailureSignalHandler dumped messages // are deal with line by line + auto signal_msg_dunmer_ptr = SignalMessageDumper::Instance().Get(); // NOTE2: we only deal with the time info ane signal info, // the stack trace will generated by paddle self if (StartsWith(data, "*** Aborted at")) { - signal_msg_dumper << " [TimeInfo: " << std::string(data, size - 1) - << "]\n"; + *signal_msg_dunmer_ptr << " [TimeInfo: " << std::string(data, size - 1) + << "]\n"; } else if (StartsWith(data, "***")) { std::string signal_info(data, size - 1); std::string useless_substr("; stack trace:"); size_t start_pos = signal_info.rfind(useless_substr); signal_info.replace(start_pos, useless_substr.length(), ""); - signal_msg_dumper << " [SignalInfo: " << signal_info << "]\n"; + *signal_msg_dunmer_ptr << " [SignalInfo: " << signal_info << "]\n"; // NOTE3: Here does not throw an exception, // otherwise it will casue "terminate called recursively" auto exp = platform::EnforceNotMet( @@ -283,7 +282,7 @@ void SignalHandle(const char *data, int size) { "A serious error (%s) is detected by the operating system.", ParseSignalErrorString(signal_info)), __FILE__, __LINE__); - std::cout << exp.what() << signal_msg_dumper.str() << std::endl; + std::cout << exp.what() << (*signal_msg_dunmer_ptr).str() << std::endl; } } catch (...) { // Since the program has already triggered a system error, diff --git a/paddle/fluid/platform/init.h b/paddle/fluid/platform/init.h index 908a5943d4b6df2c11d020bb489fa6023107f1e1..5bd5a640ade351fc66b01e89cf670ed8b0fd3b05 100644 --- a/paddle/fluid/platform/init.h +++ b/paddle/fluid/platform/init.h @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include // NOLINT #include #include @@ -22,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace platform { -void ParseCommandLineFlags(int argc, char **argv, bool remove); +void ParseCommandLineFlags(int argc, char** argv, bool remove); } // namespace platform } // namespace paddle @@ -32,14 +33,32 @@ namespace framework { bool InitGflags(std::vector argv); -void InitGLOG(const std::string &prog_name); +void InitGLOG(const std::string& prog_name); void InitDevices(bool init_p2p); void InitDevices(bool init_p2p, const std::vector devices); #ifndef _WIN32 -void SignalHandle(const char *data, int size); +class SignalMessageDumper { + public: + ~SignalMessageDumper() {} + SignalMessageDumper(const SignalMessageDumper& o) = delete; + const SignalMessageDumper& operator=(const SignalMessageDumper& o) = delete; + + static SignalMessageDumper& Instance() { + static SignalMessageDumper instance; + return instance; + } + + std::shared_ptr Get() { return dumper_; } + + private: + SignalMessageDumper() : dumper_(new std::ostringstream()) {} + std::shared_ptr dumper_; +}; + +void SignalHandle(const char* data, int size); #endif } // namespace framework diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py b/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py index 0a9e66a5bb0b1b7f15928891f8eefcbc67ebffb5..05fce7bf837664eafd89319eb6cdd973b745605f 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py @@ -112,14 +112,14 @@ class PartialProgramLayer(layers.Layer): self._outputs = NestSequence(outputs, need_check=True) self._params = parameters if parameters is not None else [] - self._infer_program = self._verify_program(main_program) - self._train_program = self._append_backward_desc() - # Switch infer or train by train() and eval() - self._trace_program = None + main_program = self._verify_program(main_program) + self._infer_program = self._clone_for_test(main_program) + self._train_program = self._append_backward_desc(main_program) + self._set_grad_type(self._params) self._inner_scope = core.Scope() # Set default mode to train - self.train() + self.training = True def _verify_program(self, main_program): """ @@ -136,8 +136,8 @@ class PartialProgramLayer(layers.Layer): return main_program @switch_to_static_graph - def _append_backward_desc(self): - program = self._infer_program.clone() + def _append_backward_desc(self, main_program): + program = main_program.clone() targets = [] for out in self._outputs.tolist(): if isinstance(out, framework.Variable): @@ -165,15 +165,6 @@ class PartialProgramLayer(layers.Layer): self._params = required_params - def train(self): - # self.training is inherited from layers.Layer - self.training = True - self._trace_program = self._train_program - - def eval(self): - self.training = False - self._trace_program = self._infer_program - def forward(self, inputs): in_vars, out_vars, tmp_scope_vec = self._prepare(inputs) @@ -186,7 +177,7 @@ class PartialProgramLayer(layers.Layer): outputs={'Out': valid_vars(out_vars), 'OutScope': tmp_scope_vec}, attrs={ - 'global_block': self._trace_program.desc.block(0), + 'global_block': self.program.desc.block(0), 'start_op_index': 0, 'end_op_index': self._infer_program.desc.block(0).op_size(), 'is_test': not self.training @@ -195,6 +186,10 @@ class PartialProgramLayer(layers.Layer): restored_nest_out = self._restore_out(out_vars) return self._remove_no_value(restored_nest_out) + @property + def program(self): + return self._train_program if self.training else self._infer_program + def _prepare(self, inputs): """ Prepare inputs, outputs, attrs. @@ -253,6 +248,10 @@ class PartialProgramLayer(layers.Layer): return outs + @switch_to_static_graph + def _clone_for_test(self, main_program): + return main_program.clone(for_test=True) + def _is_no_value(self, var): if isinstance(var, core.VarBase): if var.shape == [1] and var.numpy()[0] == RETURN_NO_VALUE_MAGIC_NUM: diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py index 64fbb51f9a5f7a2937b5f7791cf0a004517bceab..6272f7369ec6db0cf7b3e5d82f689ddabf3e19ab 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py @@ -487,6 +487,8 @@ class ProgramTranslator(object): _, partial_program_layer = self._program_cache[function_spec] if args and isinstance(args[0], layers.Layer): + # Synchronize self.training attribute. + partial_program_layer.training = args[0].training args = args[1:] return partial_program_layer(args) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py index 2f67710649b05bc0dea38f126bfc87ef473c7ffe..3da60e955deee9b6d4c74ba5ff1a550ae135afdb 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py @@ -16,7 +16,9 @@ from __future__ import print_function import numpy as np import paddle.fluid as fluid from paddle.fluid.layers.utils import flatten -from paddle.fluid.dygraph import declarative +from paddle.fluid.dygraph import declarative, ProgramTranslator + +from test_fetch_feed import Linear import unittest @@ -121,5 +123,33 @@ class TestWithNestedOutput(unittest.TestCase): self.assertTrue(dy_var, st_var) +class TestWithTrainAndEval(unittest.TestCase): + def test_switch_eval_and_train(self): + program_translator = ProgramTranslator() + + with fluid.dygraph.guard(): + linear_net = Linear() + x_data = np.random.random((4, 10)).astype('float32') + x = fluid.dygraph.to_variable(x_data) + linear_net(x) + + _, partial_layer = program_translator.get_program_cache().last()[-1] + # check default mode is for training + self.assertEqual(partial_layer.program, + partial_layer._train_program) + + # switch to run test program after `eval()` + linear_net.eval() + linear_net(x) + self.assertEqual(partial_layer.program, + partial_layer._infer_program) + + # switch back into training + linear_net.train() + linear_net(x) + self.assertEqual(partial_layer.program, + partial_layer._train_program) + + if __name__ == '__main__': unittest.main()