From 3a76062c8463d58bfeef7e4d6a4f899f79341989 Mon Sep 17 00:00:00 2001 From: QI JUN Date: Fri, 24 Nov 2017 13:18:13 +0800 Subject: [PATCH] support testing when training and handle dropout and batch_norm operator in testing mode (#5734) * is_training to is_test in dropout op * handle dropout and batch_norm operator when prune pdesc in testing mode * handle dropout and batch_norm operator when prune pdesc in testing mode * add get_inference_program method * fix dropout op * fix ci * test data after each batch training * refine code * refine test_book3 * fix ci * follow comments --- paddle/framework/executor.cc | 2 +- paddle/framework/prune.cc | 23 +++++++++++ paddle/framework/prune.h | 2 + paddle/operators/dropout_op.cc | 8 ++-- paddle/operators/dropout_op.cu | 2 +- paddle/operators/dropout_op.h | 6 +-- paddle/pybind/pybind.cc | 5 +++ python/paddle/v2/fluid/evaluator.py | 3 ++ python/paddle/v2/fluid/framework.py | 7 ++++ python/paddle/v2/fluid/io.py | 19 +++++++-- .../book/test_image_classification_train.py | 40 +++++++++++++++++-- .../tests/book/test_recognize_digits_mlp.py | 37 +++++++++++++++-- .../paddle/v2/fluid/tests/test_dropout_op.py | 10 ++--- 13 files changed, 141 insertions(+), 23 deletions(-) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index adedd8cb0e..2ffb5b7dbb 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -120,7 +120,7 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id, for (auto& op_desc : block.AllOps()) { auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); - VLOG(10) << op->DebugString(); + VLOG(3) << op->DebugString(); op->Run(*local_scope, *device); } if (create_local_scope) { diff --git a/paddle/framework/prune.cc b/paddle/framework/prune.cc index bf3066983c..da76052eb4 100644 --- a/paddle/framework/prune.cc +++ b/paddle/framework/prune.cc @@ -26,6 +26,8 @@ namespace framework { const std::string kFeedOpType = "feed"; const std::string kFetchOpType = "fetch"; +const std::string kDropOutOpType = "dropout"; +const std::string kBatchNormOpType = "batch_norm"; bool HasDependentVar(const OpDesc& op_desc, const std::set& dependent_vars) { @@ -106,5 +108,26 @@ void Prune(const ProgramDesc& input, ProgramDesc* output) { prune_impl(input, output, 0); } +void inference_optimize_impl(const ProgramDesc& input, ProgramDesc* output, + int block_id) { + *output = input; + auto* op_field = output->mutable_blocks(block_id)->mutable_ops(); + for (auto& op_desc : *op_field) { + if (op_desc.type() == kDropOutOpType || + op_desc.type() == kBatchNormOpType) { + for (auto& attr : *op_desc.mutable_attrs()) { + if (attr.name() == "is_test") { + attr.set_b(true); + break; + } + } + } + } +} + +void InferenceOptimize(const ProgramDesc& input, ProgramDesc* output) { + inference_optimize_impl(input, output, 0); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/prune.h b/paddle/framework/prune.h index 8cfb16343a..23db014894 100644 --- a/paddle/framework/prune.h +++ b/paddle/framework/prune.h @@ -22,5 +22,7 @@ namespace framework { void Prune(const ProgramDesc& input, ProgramDesc* output); +void InferenceOptimize(const ProgramDesc& input, ProgramDesc* output); + } // namespace framework } // namespace paddle diff --git a/paddle/operators/dropout_op.cc b/paddle/operators/dropout_op.cc index 818146aca7..932c0bf8fb 100644 --- a/paddle/operators/dropout_op.cc +++ b/paddle/operators/dropout_op.cc @@ -30,7 +30,7 @@ class DropoutOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); ctx->SetOutputDim("Out", x_dims); - if (ctx->Attrs().Get("is_training") == true) { + if (ctx->Attrs().Get("is_test") == false) { ctx->SetOutputDim("Mask", x_dims); } ctx->ShareLoD("X", /*->*/ "Out"); @@ -49,7 +49,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("dropout_prob", "Probability of setting units to zero.") .SetDefault(.5f); - AddAttr("is_training", "True if in training phase.").SetDefault(true); + AddAttr("is_test", "True if in test phase.").SetDefault(false); AddAttr("seed", "Dropout random seed.").SetDefault(0); AddComment(R"DOC( @@ -71,8 +71,8 @@ class DropoutOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->Attrs().Get("is_training"), true, - "GradOp is only callable when is_training is true"); + PADDLE_ENFORCE_EQ(ctx->Attrs().Get("is_test"), false, + "GradOp is only callable when is_test is false"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasInput("Mask"), "Mask must not be null."); diff --git a/paddle/operators/dropout_op.cu b/paddle/operators/dropout_op.cu index 30c769000f..db3578b9bf 100644 --- a/paddle/operators/dropout_op.cu +++ b/paddle/operators/dropout_op.cu @@ -59,7 +59,7 @@ class GPUDropoutKernel : public framework::OpKernel { auto Y = EigenMatrix::Reshape(*y, 1); auto place = context.GetEigenDevice(); - if (context.Attr("is_training")) { + if (!context.Attr("is_test")) { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); int size = framework::product(mask->dims()); diff --git a/paddle/operators/dropout_op.h b/paddle/operators/dropout_op.h index 6000b75fec..d9a130fdc0 100644 --- a/paddle/operators/dropout_op.h +++ b/paddle/operators/dropout_op.h @@ -35,7 +35,7 @@ class CPUDropoutKernel : public framework::OpKernel { auto* y_data = y->mutable_data(context.GetPlace()); float dropout_prob = context.Attr("dropout_prob"); - if (context.Attr("is_training")) { + if (!context.Attr("is_test")) { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); int seed = context.Attr("seed"); @@ -65,8 +65,8 @@ template class DropoutGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE(context.Attr("is_training"), - "GradOp is only callable when is_training is true"); + PADDLE_ENFORCE(!context.Attr("is_test"), + "GradOp is only callable when is_test is false"); auto* grad_x = context.Output(framework::GradVarName("X")); auto* grad_y = context.Input(framework::GradVarName("Out")); diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 3d8d3f1d2f..e697739cc6 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -293,6 +293,11 @@ All parameter, weight, gradient are variables in Paddle. Prune(*prog_with_targets.Proto(), &pruned_desc); return new ProgramDescBind(pruned_desc); }); + m.def("inference_optimize", [](ProgramDescBind &origin) { + ProgramDesc pruned_desc; + InferenceOptimize(*(origin.Proto()), &pruned_desc); + return new ProgramDescBind(pruned_desc); + }); m.def_submodule( "var_names", "The module will return special predefined variable name in Paddle") diff --git a/python/paddle/v2/fluid/evaluator.py b/python/paddle/v2/fluid/evaluator.py index 0057ed6216..f78d2f814c 100644 --- a/python/paddle/v2/fluid/evaluator.py +++ b/python/paddle/v2/fluid/evaluator.py @@ -33,6 +33,9 @@ class Evaluator(object): else: self._main_program = g_main_program + def states(self): + return self._states + def _update_ops(self, *args, **kwargs): """ append update ops to the global states diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index fb1c57d296..872c19c2f6 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -511,6 +511,13 @@ class Program(object): res.sync_with_cpp() return res + def inference_optimize(self): + res = Program() + res.desc = core.inference_optimize(self.desc) + res.blocks = [Block(res, i) for i in xrange(res.desc.num_blocks())] + res.sync_with_cpp() + return res + @staticmethod def parse_from_string(binary_str): p = Program() diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py index 6f55fe9e74..e5b2aa3b91 100644 --- a/python/paddle/v2/fluid/io.py +++ b/python/paddle/v2/fluid/io.py @@ -6,7 +6,8 @@ from paddle.v2.fluid.framework import Program, Parameter, g_main_program, \ __all__ = [ 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', - 'load_persistables', "save_inference_model", "load_inference_model" + 'load_persistables', "save_inference_model", "load_inference_model", + "get_inference_program" ] @@ -151,6 +152,17 @@ def load_persistables(executor, dirname, main_program=None): predicate=is_persistable) +def get_inference_program(target_vars, main_program=None): + if main_program is None: + main_program = g_main_program + if not isinstance(target_vars, list): + target_vars = [target_vars] + + pruned_program = main_program.prune(targets=target_vars) + inference_program = pruned_program.inference_optimize() + return inference_program + + def save_inference_model(dirname, feeded_var_names, target_vars, @@ -177,13 +189,14 @@ def save_inference_model(dirname, if not os.path.isdir(dirname): os.makedirs(dirname) - pruned_program = main_program.prune(target_vars) + pruned_program = main_program.prune(targets=target_vars) + inference_program = pruned_program.inference_optimize() fetch_var_names = [v.name for v in target_vars] model_file_name = dirname + "/__model__" with open(model_file_name, "w") as f: pickle.dump({ - "program_desc_str": pruned_program.desc.serialize_to_string(), + "program_desc_str": inference_program.desc.serialize_to_string(), "feed_var_names": feeded_var_names, "fetch_var_names": fetch_var_names }, f, -1) diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py index a3acab67ce..76cbd410f9 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py @@ -5,6 +5,7 @@ import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets import paddle.v2.fluid.evaluator as evaluator +from paddle.v2.fluid.io import get_inference_program from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.initializer import XavierInitializer from paddle.v2.fluid.optimizer import AdamOptimizer @@ -116,9 +117,11 @@ PASS_NUM = 1 train_reader = paddle.batch( paddle.reader.shuffle( - paddle.dataset.cifar.train10(), buf_size=128 * 10), + paddle.dataset.cifar.train10(), buf_size=BATCH_SIZE * 10), batch_size=BATCH_SIZE) +test_reader = paddle.batch(paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE) + place = core.CPUPlace() exe = Executor(place) @@ -149,10 +152,41 @@ for pass_id in range(PASS_NUM): loss = np.array(outs[0]) acc = np.array(outs[1]) pass_acc = accuracy.eval(exe) + + batch_id = batch_id + 1 + + test_accuracy, test_acc_out = evaluator.accuracy( + input=predict, label=label) + + test_target = [avg_cost, test_acc_out] + test_accuracy.states().values() + inference_program = get_inference_program(test_target) + + test_accuracy.reset(exe) + + for data in test_reader(): + x_data = np.array(map(lambda x: x[0].reshape(data_shape), + data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + + outs = exe.run(inference_program, + feed={'pixel': tensor_x, + 'label': tensor_y}, + fetch_list=[avg_cost, test_acc_out]) + out = np.array(outs[0]) + acc = np.array(outs[1]) + + test_pass_acc = test_accuracy.eval(exe) + print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + " loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( - pass_acc)) - batch_id = batch_id + 1 + pass_acc) + " test_pass_acc:" + str(test_pass_acc)) if batch_id > 1: # this model is slow, so if we can train two mini batch, we think it works properly. diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index 03d3881549..f57a5c8d98 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -4,6 +4,7 @@ import paddle.v2.fluid.core as core import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.evaluator as evaluator +from paddle.v2.fluid.io import get_inference_program from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.initializer import UniformInitializer from paddle.v2.fluid.optimizer import MomentumOptimizer @@ -42,6 +43,8 @@ train_reader = paddle.batch( paddle.dataset.mnist.train(), buf_size=8192), batch_size=BATCH_SIZE) +test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) + place = core.CPUPlace() exe = Executor(place) @@ -69,8 +72,36 @@ for pass_id in range(PASS_NUM): acc = np.array(outs[1]) pass_acc = accuracy.eval(exe) - if pass_acc > 0.7: + test_accuracy, test_acc_out = evaluator.accuracy( + input=predict, label=label) + + test_target = [avg_cost, test_acc_out] + test_accuracy.states().values() + inference_program = get_inference_program(test_target) + + test_accuracy.reset(exe) + for data in test_reader(): + x_data = np.array(map(lambda x: x[0], data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + + outs = exe.run(inference_program, + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_cost, test_acc_out]) + out = np.array(outs[0]) + acc = np.array(outs[1]) + + test_pass_acc = test_accuracy.eval(exe) + print("pass_id=" + str(pass_id) + " train_cost=" + str( + out) + " train_acc=" + str(acc) + " train_pass_acc=" + str(pass_acc) + + " test_acc=" + str(test_pass_acc)) + + if test_pass_acc > 0.7: exit(0) - # print("pass_id=" + str(pass_id) + " auc=" + - # str(acc) + " pass_acc=" + str(pass_acc)) exit(1) diff --git a/python/paddle/v2/fluid/tests/test_dropout_op.py b/python/paddle/v2/fluid/tests/test_dropout_op.py index b14a366fca..4f5ea836b4 100644 --- a/python/paddle/v2/fluid/tests/test_dropout_op.py +++ b/python/paddle/v2/fluid/tests/test_dropout_op.py @@ -7,7 +7,7 @@ class TestDropoutOp(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 0.0, 'is_training': True} + self.attrs = {'dropout_prob': 0.0, 'is_test': False} self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64)).astype('float32') @@ -24,7 +24,7 @@ class TestDropoutOp2(TestDropoutOp): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 1.0, 'is_training': True} + self.attrs = {'dropout_prob': 1.0, 'is_test': False} self.outputs = { 'Out': np.zeros((32, 64)).astype('float32'), 'Mask': np.zeros((32, 64)).astype('float32') @@ -35,7 +35,7 @@ class TestDropoutOp3(TestDropoutOp): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 2)).astype("float32")} - self.attrs = {'dropout_prob': 0.0, 'is_training': True} + self.attrs = {'dropout_prob': 0.0, 'is_test': False} self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64, 2)).astype('float32') @@ -46,7 +46,7 @@ class TestDropoutOp4(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 0.35, 'is_training': False} + self.attrs = {'dropout_prob': 0.35, 'is_test': True} self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']} def test_check_output(self): @@ -57,7 +57,7 @@ class TestDropoutOp5(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 3)).astype("float32")} - self.attrs = {'dropout_prob': 0.75, 'is_training': False} + self.attrs = {'dropout_prob': 0.75, 'is_test': True} self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']} def test_check_output(self): -- GitLab