未验证 提交 6df93364 编写于 作者: R Ruibiao Chen 提交者: GitHub

Enable startup program for standalone executor (#45314)

* Enable startup program for standalone executor

* Disable test_py_reader_using_executor

* Fix test_parallel_executor_mnist

* Fix CI errors

* Fix CI errors
上级 23bc0e3c
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/details/share_tensor_buffer_functor.h" #include "paddle/fluid/framework/details/share_tensor_buffer_functor.h"
#include "paddle/fluid/framework/new_executor/interpretercore_util.h" #include "paddle/fluid/framework/new_executor/interpretercore_util.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/os_info.h" #include "paddle/fluid/platform/os_info.h"
#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/fluid/platform/profiler/supplement_tracing.h" #include "paddle/fluid/platform/profiler/supplement_tracing.h"
...@@ -28,7 +29,6 @@ ...@@ -28,7 +29,6 @@
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#endif #endif
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
PADDLE_DEFINE_EXPORTED_bool(new_executor_use_inplace, PADDLE_DEFINE_EXPORTED_bool(new_executor_use_inplace,
false, false,
...@@ -104,7 +104,7 @@ InterpreterCore::~InterpreterCore() { ...@@ -104,7 +104,7 @@ InterpreterCore::~InterpreterCore() {
interpreter::CostInfo InterpreterCore::DryRun( interpreter::CostInfo InterpreterCore::DryRun(
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
const std::vector<framework::LoDTensor>& feed_tensors) { const std::vector<framework::LoDTensor>& feed_tensors) {
#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(place_)) { if (platform::is_gpu_place(place_)) {
platform::SetDeviceId(place_.device); platform::SetDeviceId(place_.device);
} }
...@@ -138,14 +138,16 @@ interpreter::CostInfo InterpreterCore::DryRun( ...@@ -138,14 +138,16 @@ interpreter::CostInfo InterpreterCore::DryRun(
paddle::framework::FetchList InterpreterCore::Run( paddle::framework::FetchList InterpreterCore::Run(
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
const std::vector<framework::LoDTensor>& feed_tensors) { const std::vector<framework::LoDTensor>& feed_tensors) {
#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(place_)) { if (platform::is_gpu_place(place_)) {
platform::SetDeviceId(place_.device); platform::SetDeviceId(place_.device);
} }
#endif #endif
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
platform::AttachPointerHashToMKLDNNKey(this, place_); platform::AttachPointerHashToMKLDNNKey(this, place_);
#endif #endif
bool is_build = is_build_; bool is_build = is_build_;
Prepare(feed_names, feed_tensors, is_build); Prepare(feed_names, feed_tensors, is_build);
...@@ -180,14 +182,16 @@ paddle::framework::FetchList InterpreterCore::Run( ...@@ -180,14 +182,16 @@ paddle::framework::FetchList InterpreterCore::Run(
paddle::framework::FetchList InterpreterCore::Run( paddle::framework::FetchList InterpreterCore::Run(
const std::vector<std::string>& feed_names) { const std::vector<std::string>& feed_names) {
#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(place_)) { if (platform::is_gpu_place(place_)) {
platform::SetDeviceId(place_.device); platform::SetDeviceId(place_.device);
} }
#endif #endif
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
platform::AttachPointerHashToMKLDNNKey(this, place_); platform::AttachPointerHashToMKLDNNKey(this, place_);
#endif #endif
if (!is_build_) { if (!is_build_) {
paddle::framework::interpreter::build_variable_scope( paddle::framework::interpreter::build_variable_scope(
block_, &var_scope_, create_local_scope_); block_, &var_scope_, create_local_scope_);
......
...@@ -1615,9 +1615,7 @@ class Executor(object): ...@@ -1615,9 +1615,7 @@ class Executor(object):
# use StandaloneExecutor to run the program. # use StandaloneExecutor to run the program.
if return_merged and self._enable_interpreter_core and _can_use_interpreter_core( if return_merged and self._enable_interpreter_core and _can_use_interpreter_core(
program, self.place): program, self.place):
inner_program = program._program if isinstance(
program, compiler.CompiledProgram) else program
if not inner_program._is_start_up_program_:
if feed is None: if feed is None:
feed = {} feed = {}
elif isinstance(feed, (list, tuple)): elif isinstance(feed, (list, tuple)):
...@@ -1641,12 +1639,9 @@ class Executor(object): ...@@ -1641,12 +1639,9 @@ class Executor(object):
lr_sheduler = program.lr_sheduler lr_sheduler = program.lr_sheduler
lr_value = lr_sheduler() lr_value = lr_sheduler()
lr_var = program.global_block().vars[lr_sheduler._var_name] lr_var = program.global_block().vars[lr_sheduler._var_name]
data = np.array([lr_value data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype))
]).astype(convert_dtype(lr_var.dtype)) tensor = core.get_variable_tensor(scope, lr_sheduler._var_name)
tensor = core.get_variable_tensor(scope, # NOTE(dev): `tensor.set(data, self.place)` always call TensorCopySync that is a blocking behavior. So we use `_copy_from` to replace it.
lr_sheduler._var_name)
# NOTE(dev): `set` always call TensorCopySync that is a
# blocking behavior. So we use `_copy_from` to replace it.
cpu_tensor = _as_lodtensor(data, core.CPUPlace()) cpu_tensor = _as_lodtensor(data, core.CPUPlace())
# for ipu, tensor is allocated on cpu # for ipu, tensor is allocated on cpu
if core.is_compiled_with_ipu(): if core.is_compiled_with_ipu():
......
...@@ -95,14 +95,11 @@ def simple_fc_net(in_size, ...@@ -95,14 +95,11 @@ def simple_fc_net(in_size,
py_reader = fluid.layers.create_py_reader_by_data( py_reader = fluid.layers.create_py_reader_by_data(
capacity=queue_capacity, capacity=queue_capacity,
use_double_buffer=use_double_buffer, use_double_buffer=use_double_buffer,
feed_list=[in_data, label], feed_list=[in_data, label])
name=unique_name.generate('py_reader_name'))
else: else:
py_reader = fluid.layers.py_reader( py_reader = fluid.layers.py_reader(capacity=queue_capacity,
capacity=queue_capacity,
shapes=[in_data.shape, label.shape], shapes=[in_data.shape, label.shape],
dtypes=['float32', 'int64'], dtypes=['float32', 'int64'],
name=unique_name.generate('py_reader_name'),
use_double_buffer=use_double_buffer) use_double_buffer=use_double_buffer)
in_data, label = fluid.layers.read_file(py_reader) in_data, label = fluid.layers.read_file(py_reader)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册