未验证 提交 6df93364 编写于 作者: R Ruibiao Chen 提交者: GitHub

Enable startup program for standalone executor (#45314)

* Enable startup program for standalone executor

* Disable test_py_reader_using_executor

* Fix test_parallel_executor_mnist

* Fix CI errors

* Fix CI errors
上级 23bc0e3c
......@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/details/share_tensor_buffer_functor.h"
#include "paddle/fluid/framework/new_executor/interpretercore_util.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/os_info.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/fluid/platform/profiler/supplement_tracing.h"
......@@ -28,7 +29,6 @@
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
PADDLE_DEFINE_EXPORTED_bool(new_executor_use_inplace,
false,
......@@ -104,7 +104,7 @@ InterpreterCore::~InterpreterCore() {
interpreter::CostInfo InterpreterCore::DryRun(
const std::vector<std::string>& feed_names,
const std::vector<framework::LoDTensor>& feed_tensors) {
#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(place_)) {
platform::SetDeviceId(place_.device);
}
......@@ -138,14 +138,16 @@ interpreter::CostInfo InterpreterCore::DryRun(
paddle::framework::FetchList InterpreterCore::Run(
const std::vector<std::string>& feed_names,
const std::vector<framework::LoDTensor>& feed_tensors) {
#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(place_)) {
platform::SetDeviceId(place_.device);
}
#endif
#ifdef PADDLE_WITH_MKLDNN
platform::AttachPointerHashToMKLDNNKey(this, place_);
#endif
bool is_build = is_build_;
Prepare(feed_names, feed_tensors, is_build);
......@@ -180,14 +182,16 @@ paddle::framework::FetchList InterpreterCore::Run(
paddle::framework::FetchList InterpreterCore::Run(
const std::vector<std::string>& feed_names) {
#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(place_)) {
platform::SetDeviceId(place_.device);
}
#endif
#ifdef PADDLE_WITH_MKLDNN
platform::AttachPointerHashToMKLDNNKey(this, place_);
#endif
if (!is_build_) {
paddle::framework::interpreter::build_variable_scope(
block_, &var_scope_, create_local_scope_);
......
......@@ -1615,51 +1615,46 @@ class Executor(object):
# use StandaloneExecutor to run the program.
if return_merged and self._enable_interpreter_core and _can_use_interpreter_core(
program, self.place):
inner_program = program._program if isinstance(
program, compiler.CompiledProgram) else program
if not inner_program._is_start_up_program_:
if feed is None:
feed = {}
elif isinstance(feed, (list, tuple)):
assert len(feed) == 1, "Not compiled with data parallel"
feed = feed[0]
if not isinstance(feed, dict):
raise TypeError(
"feed requires dict as its Parameter. But you passed in %s"
% (type(feed)))
feed = self._update_feed(program, feed)
program, new_exe = self._executor_cache.get_program_and_executor(
program, feed, fetch_list, feed_var_name, fetch_var_name,
self.place, scope)
self._feed_data(program, feed, feed_var_name, scope)
if hasattr(program, 'lr_sheduler'):
from paddle.optimizer.lr import LRScheduler
assert isinstance(program.lr_sheduler,
LRScheduler), "must be LRScheduler"
lr_sheduler = program.lr_sheduler
lr_value = lr_sheduler()
lr_var = program.global_block().vars[lr_sheduler._var_name]
data = np.array([lr_value
]).astype(convert_dtype(lr_var.dtype))
tensor = core.get_variable_tensor(scope,
lr_sheduler._var_name)
# NOTE(dev): `set` always call TensorCopySync that is a
# blocking behavior. So we use `_copy_from` to replace it.
cpu_tensor = _as_lodtensor(data, core.CPUPlace())
# for ipu, tensor is allocated on cpu
if core.is_compiled_with_ipu():
tensor._copy_from(cpu_tensor, tensor._place())
else:
tensor._copy_from(cpu_tensor, self.place)
warnings.warn(
"FLAGS_USE_STANDALONE_EXECUTOR is set to 1. New executor is used to execute Program."
)
if feed is None:
feed = {}
elif isinstance(feed, (list, tuple)):
assert len(feed) == 1, "Not compiled with data parallel"
feed = feed[0]
if not isinstance(feed, dict):
raise TypeError(
"feed requires dict as its Parameter. But you passed in %s"
% (type(feed)))
feed = self._update_feed(program, feed)
program, new_exe = self._executor_cache.get_program_and_executor(
program, feed, fetch_list, feed_var_name, fetch_var_name,
self.place, scope)
self._feed_data(program, feed, feed_var_name, scope)
if hasattr(program, 'lr_sheduler'):
from paddle.optimizer.lr import LRScheduler
assert isinstance(program.lr_sheduler,
LRScheduler), "must be LRScheduler"
lr_sheduler = program.lr_sheduler
lr_value = lr_sheduler()
lr_var = program.global_block().vars[lr_sheduler._var_name]
data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype))
tensor = core.get_variable_tensor(scope, lr_sheduler._var_name)
# NOTE(dev): `tensor.set(data, self.place)` always call TensorCopySync that is a blocking behavior. So we use `_copy_from` to replace it.
cpu_tensor = _as_lodtensor(data, core.CPUPlace())
# for ipu, tensor is allocated on cpu
if core.is_compiled_with_ipu():
tensor._copy_from(cpu_tensor, tensor._place())
else:
tensor._copy_from(cpu_tensor, self.place)
warnings.warn(
"FLAGS_USE_STANDALONE_EXECUTOR is set to 1. New executor is used to execute Program."
)
return new_exe.run(scope, list(feed.keys()), fetch_list,
return_numpy)
return new_exe.run(scope, list(feed.keys()), fetch_list,
return_numpy)
compiled = isinstance(program, compiler.CompiledProgram)
......
......@@ -95,15 +95,12 @@ def simple_fc_net(in_size,
py_reader = fluid.layers.create_py_reader_by_data(
capacity=queue_capacity,
use_double_buffer=use_double_buffer,
feed_list=[in_data, label],
name=unique_name.generate('py_reader_name'))
feed_list=[in_data, label])
else:
py_reader = fluid.layers.py_reader(
capacity=queue_capacity,
shapes=[in_data.shape, label.shape],
dtypes=['float32', 'int64'],
name=unique_name.generate('py_reader_name'),
use_double_buffer=use_double_buffer)
py_reader = fluid.layers.py_reader(capacity=queue_capacity,
shapes=[in_data.shape, label.shape],
dtypes=['float32', 'int64'],
use_double_buffer=use_double_buffer)
in_data, label = fluid.layers.read_file(py_reader)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册