提交 1cd61122 编写于 作者: Z zlsh80826

Merge branch 'fix_stack_op_conflict' into trt_stack_opi, test=develop

......@@ -58,7 +58,6 @@ namespace framework {
std::once_flag gflags_init_flag;
std::once_flag glog_init_flag;
std::once_flag p2p_init_flag;
std::once_flag glog_warning_once_flag;
bool InitGflags(std::vector<std::string> args) {
bool successed = false;
......@@ -260,22 +259,22 @@ const char *ParseSignalErrorString(const std::string &str) {
}
// Handle SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGBUS, and SIGTERM.
std::ostringstream signal_msg_dumper;
void SignalHandle(const char *data, int size) {
try {
// NOTE1: The glog FailureSignalHandler dumped messages
// are deal with line by line
auto signal_msg_dunmer_ptr = SignalMessageDumper::Instance().Get();
// NOTE2: we only deal with the time info ane signal info,
// the stack trace will generated by paddle self
if (StartsWith(data, "*** Aborted at")) {
signal_msg_dumper << " [TimeInfo: " << std::string(data, size - 1)
<< "]\n";
*signal_msg_dunmer_ptr << " [TimeInfo: " << std::string(data, size - 1)
<< "]\n";
} else if (StartsWith(data, "***")) {
std::string signal_info(data, size - 1);
std::string useless_substr("; stack trace:");
size_t start_pos = signal_info.rfind(useless_substr);
signal_info.replace(start_pos, useless_substr.length(), "");
signal_msg_dumper << " [SignalInfo: " << signal_info << "]\n";
*signal_msg_dunmer_ptr << " [SignalInfo: " << signal_info << "]\n";
// NOTE3: Here does not throw an exception,
// otherwise it will casue "terminate called recursively"
auto exp = platform::EnforceNotMet(
......@@ -283,7 +282,7 @@ void SignalHandle(const char *data, int size) {
"A serious error (%s) is detected by the operating system.",
ParseSignalErrorString(signal_info)),
__FILE__, __LINE__);
std::cout << exp.what() << signal_msg_dumper.str() << std::endl;
std::cout << exp.what() << (*signal_msg_dunmer_ptr).str() << std::endl;
}
} catch (...) {
// Since the program has already triggered a system error,
......
......@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <mutex> // NOLINT
#include <string>
#include <vector>
......@@ -22,7 +23,7 @@ limitations under the License. */
namespace paddle {
namespace platform {
void ParseCommandLineFlags(int argc, char **argv, bool remove);
void ParseCommandLineFlags(int argc, char** argv, bool remove);
} // namespace platform
} // namespace paddle
......@@ -32,14 +33,32 @@ namespace framework {
bool InitGflags(std::vector<std::string> argv);
void InitGLOG(const std::string &prog_name);
void InitGLOG(const std::string& prog_name);
void InitDevices(bool init_p2p);
void InitDevices(bool init_p2p, const std::vector<int> devices);
#ifndef _WIN32
void SignalHandle(const char *data, int size);
class SignalMessageDumper {
public:
~SignalMessageDumper() {}
SignalMessageDumper(const SignalMessageDumper& o) = delete;
const SignalMessageDumper& operator=(const SignalMessageDumper& o) = delete;
static SignalMessageDumper& Instance() {
static SignalMessageDumper instance;
return instance;
}
std::shared_ptr<std::ostringstream> Get() { return dumper_; }
private:
SignalMessageDumper() : dumper_(new std::ostringstream()) {}
std::shared_ptr<std::ostringstream> dumper_;
};
void SignalHandle(const char* data, int size);
#endif
} // namespace framework
......
......@@ -112,14 +112,14 @@ class PartialProgramLayer(layers.Layer):
self._outputs = NestSequence(outputs, need_check=True)
self._params = parameters if parameters is not None else []
self._infer_program = self._verify_program(main_program)
self._train_program = self._append_backward_desc()
# Switch infer or train by train() and eval()
self._trace_program = None
main_program = self._verify_program(main_program)
self._infer_program = self._clone_for_test(main_program)
self._train_program = self._append_backward_desc(main_program)
self._set_grad_type(self._params)
self._inner_scope = core.Scope()
# Set default mode to train
self.train()
self.training = True
def _verify_program(self, main_program):
"""
......@@ -136,8 +136,8 @@ class PartialProgramLayer(layers.Layer):
return main_program
@switch_to_static_graph
def _append_backward_desc(self):
program = self._infer_program.clone()
def _append_backward_desc(self, main_program):
program = main_program.clone()
targets = []
for out in self._outputs.tolist():
if isinstance(out, framework.Variable):
......@@ -165,15 +165,6 @@ class PartialProgramLayer(layers.Layer):
self._params = required_params
def train(self):
# self.training is inherited from layers.Layer
self.training = True
self._trace_program = self._train_program
def eval(self):
self.training = False
self._trace_program = self._infer_program
def forward(self, inputs):
in_vars, out_vars, tmp_scope_vec = self._prepare(inputs)
......@@ -186,7 +177,7 @@ class PartialProgramLayer(layers.Layer):
outputs={'Out': valid_vars(out_vars),
'OutScope': tmp_scope_vec},
attrs={
'global_block': self._trace_program.desc.block(0),
'global_block': self.program.desc.block(0),
'start_op_index': 0,
'end_op_index': self._infer_program.desc.block(0).op_size(),
'is_test': not self.training
......@@ -195,6 +186,10 @@ class PartialProgramLayer(layers.Layer):
restored_nest_out = self._restore_out(out_vars)
return self._remove_no_value(restored_nest_out)
@property
def program(self):
return self._train_program if self.training else self._infer_program
def _prepare(self, inputs):
"""
Prepare inputs, outputs, attrs.
......@@ -253,6 +248,10 @@ class PartialProgramLayer(layers.Layer):
return outs
@switch_to_static_graph
def _clone_for_test(self, main_program):
return main_program.clone(for_test=True)
def _is_no_value(self, var):
if isinstance(var, core.VarBase):
if var.shape == [1] and var.numpy()[0] == RETURN_NO_VALUE_MAGIC_NUM:
......
......@@ -487,6 +487,8 @@ class ProgramTranslator(object):
_, partial_program_layer = self._program_cache[function_spec]
if args and isinstance(args[0], layers.Layer):
# Synchronize self.training attribute.
partial_program_layer.training = args[0].training
args = args[1:]
return partial_program_layer(args)
......
......@@ -16,7 +16,9 @@ from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten
from paddle.fluid.dygraph import declarative
from paddle.fluid.dygraph import declarative, ProgramTranslator
from test_fetch_feed import Linear
import unittest
......@@ -121,5 +123,33 @@ class TestWithNestedOutput(unittest.TestCase):
self.assertTrue(dy_var, st_var)
class TestWithTrainAndEval(unittest.TestCase):
def test_switch_eval_and_train(self):
program_translator = ProgramTranslator()
with fluid.dygraph.guard():
linear_net = Linear()
x_data = np.random.random((4, 10)).astype('float32')
x = fluid.dygraph.to_variable(x_data)
linear_net(x)
_, partial_layer = program_translator.get_program_cache().last()[-1]
# check default mode is for training
self.assertEqual(partial_layer.program,
partial_layer._train_program)
# switch to run test program after `eval()`
linear_net.eval()
linear_net(x)
self.assertEqual(partial_layer.program,
partial_layer._infer_program)
# switch back into training
linear_net.train()
linear_net(x)
self.assertEqual(partial_layer.program,
partial_layer._train_program)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册