Merge pull request #10741 from jacquesqiao/inferencer-support-multi-gpu

Inferencer support parallel_executor

Merge pull request #10741 from jacquesqiao/inferencer-support-multi-gpu
Inferencer support parallel_executor
54ae8e45 · Qiao Longfei · GitHub · 67b8a300 · d2d671e3 · 54ae8e45
6 changed file
--- a/python/paddle/fluid/inferencer.py
+++ b/python/paddle/fluid/inferencer.py
@@ -12,11 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import contextlib
 import core
 import executor
 import framework
 import io
+import parallel_executor
 import unique_name
 from trainer import check_and_get_place
@@ -24,40 +27,53 @@ __all__ = ['Inferencer', ]
 class Inferencer(object):
-    def __init__(self, infer_func, param_path, place=None):
+    def __init__(self, infer_func, param_path, place=None, parallel=False):
        """
        :param infer_func: a function that will return predict Variable
        :param param_path: the path where the inference model is saved by fluid.io.save_params
        :param place: place to do the inference
+        :param parallel: use parallel_executor to run the inference, it will use multi CPU/GPU.
        """
        self.param_path = param_path
        self.scope = core.Scope()
+        self.parallel = parallel
+        self.place = check_and_get_place(place)
        self.inference_program = framework.Program()
        with framework.program_guard(self.inference_program):
            with unique_name.guard():
                self.predict_var = infer_func()
-        self.exe = executor.Executor(check_and_get_place(place))
+        with self._prog_and_scope_guard():
-        with executor.scope_guard(self.scope):
            # load params from param_path into scope
-            io.load_params(self.exe, param_path, self.inference_program)
+            io.load_params(executor.Executor(self.place), param_path)
+        if parallel:
+            with self._prog_and_scope_guard():
+                self.exe = parallel_executor.ParallelExecutor(
+                    use_cuda=isinstance(self.place, core.CUDAPlace),
+                    loss_name=self.predict_var.name)
+        else:
+            self.exe = executor.Executor(self.place)
-    def infer(self, inputs, return_numpy=True):
+    def infer(self, inputs):
        """
        :param inputs: a map of {"input_name": input_var} that will be feed into the inference program
        to get the predict value
-        :param return_numpy: if return numpy value for row tensor
        :return: the predict value of the inference model
        """
        if not isinstance(inputs, dict):
            raise ValueError(
                "inputs should be a map of {'input_name': input_var}")
-        with executor.scope_guard(self.scope):
+        with self._prog_and_scope_guard():
-            results = self.exe.run(self.inference_program,
+            results = self.exe.run(feed=inputs,
-                                   feed=inputs,
+                                   fetch_list=[self.predict_var.name])
-                                   fetch_list=[self.predict_var],
-                                   return_numpy=return_numpy)
        return results
+    @contextlib.contextmanager
+    def _prog_and_scope_guard(self):
+        with framework.program_guard(main_program=self.inference_program):
+            with executor.scope_guard(self.scope):
+                yield
--- a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py
+++ b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py
@@ -94,7 +94,7 @@ def infer(use_cuda, inference_program, save_dirname=None):
    tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
    results = inferencer.infer({'x': tensor_x})
-    print("infer results: ", results[0])
+    print("infer results: ", numpy.array(results[0]))
 def main(use_cuda):

--- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py
+++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py
@@ -112,7 +112,7 @@ def infer(use_cuda, inference_program, save_dirname=None):
    results = inferencer.infer({'img': tensor_img})
-    print("infer results: ", results[0])
+    print("infer results: ", numpy.array(results[0]))
 def main(use_cuda):

--- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py
+++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py
@@ -93,7 +93,7 @@ def infer(use_cuda, inference_program, save_dirname=None):
    results = inferencer.infer({'img': tensor_img})
-    print("infer results: ", results[0])
+    print("infer results: ", numpy.array(results[0]))
 def main(use_cuda):

--- a/python/paddle/fluid/tests/book/high-level-api/word2vec/no_test_word2vec_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/word2vec/no_test_word2vec_new_api.py
@@ -127,14 +127,12 @@ def infer(use_cuda, inference_program, save_path):
    third_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
    fourth_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
-    result = inferencer.infer(
+    result = inferencer.infer({
-        {
+        'firstw': first_word,
-            'firstw': first_word,
+        'secondw': second_word,
-            'secondw': second_word,
+        'thirdw': third_word,
-            'thirdw': third_word,
+        'forthw': fourth_word
-            'forthw': fourth_word
+    })
-        },
-        return_numpy=False)
    print(np.array(result[0]))

--- a/python/paddle/fluid/trainer.py
+++ b/python/paddle/fluid/trainer.py
@@ -12,18 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import contextlib
 import os
 import core
-import framework
-import executor
 import data_feeder
-import contextlib
+import executor
+import framework
 import io
-import unique_name
-import parallel_executor
 # optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
 import optimizer as opt_module
+import parallel_executor
 from transpiler import distribute_transpiler
 __all__ = [