Merge pull request #10741 from jacquesqiao/inferencer-support-multi-gpu

Inferencer support parallel_executor

Merge pull request #10741 from jacquesqiao/inferencer-support-multi-gpu
Inferencer support parallel_executor
54ae8e45 · Qiao Longfei · GitHub · 67b8a300 · d2d671e3 · 54ae8e45
6 changed file
--- a/python/paddle/fluid/inferencer.py
+++ b/python/paddle/fluid/inferencer.py
@@ -12,11 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import contextlib
+
 import core

 import executor
 import framework
 import io
+import parallel_executor
 import unique_name
 from trainer import check_and_get_place

@@ -24,40 +27,53 @@ __all__ = ['Inferencer', ]


 class Inferencer(object):
-    def __init__(self, infer_func, param_path, place=None):
+    def __init__(self, infer_func, param_path, place=None, parallel=False):
        """
        :param infer_func: a function that will return predict Variable
        :param param_path: the path where the inference model is saved by fluid.io.save_params
        :param place: place to do the inference
+        :param parallel: use parallel_executor to run the inference, it will use multi CPU/GPU.
        """
        self.param_path = param_path
        self.scope = core.Scope()
+        self.parallel = parallel
+        self.place = check_and_get_place(place)

        self.inference_program = framework.Program()
        with framework.program_guard(self.inference_program):
            with unique_name.guard():
                self.predict_var = infer_func()

-        self.exe = executor.Executor(check_and_get_place(place))
-        with executor.scope_guard(self.scope):
+        with self._prog_and_scope_guard():
            # load params from param_path into scope
-            io.load_params(self.exe, param_path, self.inference_program)
+            io.load_params(executor.Executor(self.place), param_path)
+
+        if parallel:
+            with self._prog_and_scope_guard():
+                self.exe = parallel_executor.ParallelExecutor(
+                    use_cuda=isinstance(self.place, core.CUDAPlace),
+                    loss_name=self.predict_var.name)
+        else:
+            self.exe = executor.Executor(self.place)

-    def infer(self, inputs, return_numpy=True):
+    def infer(self, inputs):
        """
        :param inputs: a map of {"input_name": input_var} that will be feed into the inference program
        to get the predict value
-        :param return_numpy: if return numpy value for row tensor
        :return: the predict value of the inference model
        """
        if not isinstance(inputs, dict):
            raise ValueError(
                "inputs should be a map of {'input_name': input_var}")

-        with executor.scope_guard(self.scope):
-            results = self.exe.run(self.inference_program,
-                                   feed=inputs,
-                                   fetch_list=[self.predict_var],
-                                   return_numpy=return_numpy)
+        with self._prog_and_scope_guard():
+            results = self.exe.run(feed=inputs,
+                                   fetch_list=[self.predict_var.name])

        return results
+
+    @contextlib.contextmanager
+    def _prog_and_scope_guard(self):
+        with framework.program_guard(main_program=self.inference_program):
+            with executor.scope_guard(self.scope):
+                yield
--- a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py
+++ b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py
@@ -94,7 +94,7 @@ def infer(use_cuda, inference_program, save_dirname=None):
    tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")

    results = inferencer.infer({'x': tensor_x})
-    print("infer results: ", results[0])
+    print("infer results: ", numpy.array(results[0]))


 def main(use_cuda):

--- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py
+++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py
@@ -112,7 +112,7 @@ def infer(use_cuda, inference_program, save_dirname=None):

    results = inferencer.infer({'img': tensor_img})

-    print("infer results: ", results[0])
+    print("infer results: ", numpy.array(results[0]))


 def main(use_cuda):

--- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py
+++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py
@@ -93,7 +93,7 @@ def infer(use_cuda, inference_program, save_dirname=None):

    results = inferencer.infer({'img': tensor_img})

-    print("infer results: ", results[0])
+    print("infer results: ", numpy.array(results[0]))


 def main(use_cuda):

--- a/python/paddle/fluid/tests/book/high-level-api/word2vec/no_test_word2vec_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/word2vec/no_test_word2vec_new_api.py
@@ -127,14 +127,12 @@ def infer(use_cuda, inference_program, save_path):
    third_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
    fourth_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)

-    result = inferencer.infer(
-        {
+    result = inferencer.infer({
        'firstw': first_word,
        'secondw': second_word,
        'thirdw': third_word,
        'forthw': fourth_word
-        },
-        return_numpy=False)
+    })
    print(np.array(result[0]))



--- a/python/paddle/fluid/trainer.py
+++ b/python/paddle/fluid/trainer.py
@@ -12,18 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import contextlib
 import os
+
 import core
-import framework
-import executor
+
 import data_feeder
-import contextlib
+import executor
+import framework
 import io
-import unique_name
-import parallel_executor
-
 # optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
 import optimizer as opt_module
+import parallel_executor
 from transpiler import distribute_transpiler

 __all__ = [