Enable is_test attr of batch norm and drop out op for test program (#8642)

* fix is_test issue * add paddle enforce * fix bug * add new func * small fix * address comments

Enable is_test attr of batch norm and drop out op for test program (#8642)
* fix is_test issue * add paddle enforce * fix bug * add new func * small fix * address comments
6720681c · kexinzhao · Yiqun Liu · f45a82be · 6720681c · 6720681c
5 changed file
--- a/paddle/fluid/framework/prune.cc
+++ b/paddle/fluid/framework/prune.cc
@@ -27,8 +27,6 @@ namespace framework {

 const std::string kFeedOpType = "feed";
 const std::string kFetchOpType = "fetch";
-const std::string kDropOutOpType = "dropout";
-const std::string kBatchNormOpType = "batch_norm";

 bool HasDependentVar(const proto::OpDesc& op_desc,
                     const std::set<std::string>& dependent_vars) {
@@ -186,18 +184,13 @@ void Prune(const proto::ProgramDesc& input, proto::ProgramDesc* output) {
  prune_impl(input, output, 0, -1, dependent_vars);
 }

-void inference_optimize_impl(const proto::ProgramDesc& input,
-                             proto::ProgramDesc* output, int block_id) {
-  *output = input;
-  auto* op_field = output->mutable_blocks(block_id)->mutable_ops();
+void inference_optimize_impl(proto::ProgramDesc* input, int block_id) {
+  auto* op_field = input->mutable_blocks(block_id)->mutable_ops();
  for (auto& op_desc : *op_field) {
-    if (op_desc.type() == kDropOutOpType ||
-        op_desc.type() == kBatchNormOpType) {
-      for (auto& attr : *op_desc.mutable_attrs()) {
-        if (attr.name() == "is_test") {
-          attr.set_b(true);
-          break;
-        }
+    for (auto& attr : *op_desc.mutable_attrs()) {
+      if (attr.name() == "is_test") {
+        attr.set_b(true);
+        break;
      }
    }
  }
@@ -205,7 +198,12 @@ void inference_optimize_impl(const proto::ProgramDesc& input,

 void InferenceOptimize(const proto::ProgramDesc& input,
                       proto::ProgramDesc* output) {
-  inference_optimize_impl(input, output, 0);
+  *output = input;
+  int num_blocks = output->blocks_size();
+  PADDLE_ENFORCE_GT(num_blocks, 0, "ProgramDesc must have at least one block");
+  for (int i = 0; i < num_blocks; ++i) {
+    inference_optimize_impl(output, i);
+  }
 }

 }  // namespace framework

--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -956,9 +956,26 @@ class Program(object):
    def get_desc(self):
        return self.desc

-    def clone(self):
+    def clone(self, for_test=False):
+        """Clone the Program object
+
+        Set for_test to False when we want to clone the program for training.
+        Set for_test to True when we want to clone the program for testing.         
+
+        Args:
+            for_test(bool): Some operators, such as batch_norm and drop_out ops,
+                behave differently in training and testing. If for_test is True,
+                the is_test attributes in these operators will be set to True for
+                testing purposes, otherwise, they remain unchanged.  
+                
+        Returns(Program):
+            The cloned Program object.
+        """
        p = Program()
-        p.desc = core.ProgramDesc(self.desc)
+        if for_test:
+            p.desc = core.inference_optimize(self.desc)
+        else:
+            p.desc = core.ProgramDesc(self.desc)
        p.blocks = [Block(p, i) for i in xrange(self.desc.num_blocks())]
        p.sync_with_cpp()
        p.copy_param_info_from(self)

--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -115,7 +115,7 @@ def train(net_type, use_cuda, save_dirname, is_local):
    acc = fluid.layers.accuracy(input=predict, label=label)

    # Test program 
-    test_program = fluid.default_main_program().clone()
+    test_program = fluid.default_main_program().clone(for_test=True)

    optimizer = fluid.optimizer.Adam(learning_rate=0.001)
    optimize_ops, params_grads = optimizer.minimize(avg_cost)

--- a/python/paddle/fluid/tests/book/test_recognize_digits.py
+++ b/python/paddle/fluid/tests/book/test_recognize_digits.py
@@ -92,7 +92,7 @@ def train(nn_type,
    else:
        prediction, avg_loss, acc = net_conf(img, label)

-    test_program = fluid.default_main_program().clone()
+    test_program = fluid.default_main_program().clone(for_test=True)

    optimizer = fluid.optimizer.Adam(learning_rate=0.001)
    optimize_ops, params_grads = optimizer.minimize(avg_loss)

--- a/python/paddle/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/fluid/tests/book/test_recommender_system.py
@@ -157,7 +157,7 @@ def train(use_cuda, save_dirname, is_local=True):
    scale_infer, avg_cost = model()

    # test program
-    test_program = fluid.default_main_program().clone()
+    test_program = fluid.default_main_program().clone(for_test=True)

    sgd_optimizer = SGDOptimizer(learning_rate=0.2)
    optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)