未验证 提交 b5af9575 编写于 作者: W wanghuancoder 提交者: GitHub

fix some bug in new executor (#36822)

* fix some bug in new executor, test=develop

* fix error message, test=develop
上级 be55bac3
...@@ -79,12 +79,13 @@ void InterpreterCore::AddFetch(const std::vector<std::string>& fetch_names) { ...@@ -79,12 +79,13 @@ void InterpreterCore::AddFetch(const std::vector<std::string>& fetch_names) {
} }
paddle::framework::FetchList InterpreterCore::Run( paddle::framework::FetchList InterpreterCore::Run(
const std::vector<framework::Tensor>& feed_tensors) { const std::vector<framework::LoDTensor>& feed_tensors) {
auto FeedInput = [&] { auto FeedInput = [&] {
for (size_t i = 0; i < feed_names_.size(); ++i) { for (size_t i = 0; i < feed_names_.size(); ++i) {
auto* feed_var = global_scope_->Var(feed_names_[i]); auto* feed_var = global_scope_->Var(feed_names_[i]);
auto feed_tensor = feed_var->GetMutable<framework::LoDTensor>(); auto feed_tensor = feed_var->GetMutable<framework::LoDTensor>();
feed_tensor->ShareDataWith(feed_tensors[i]); feed_tensor->ShareDataWith(feed_tensors[i]);
feed_tensor->set_lod(feed_tensors[i].lod());
} }
}; };
...@@ -495,7 +496,7 @@ void InterpreterCore::CheckGC(const Instruction& instr) { ...@@ -495,7 +496,7 @@ void InterpreterCore::CheckGC(const Instruction& instr) {
} }
void InterpreterCore::DryRunPrepare( void InterpreterCore::DryRunPrepare(
const std::vector<framework::Tensor>& feed_tensors) { const std::vector<framework::LoDTensor>& feed_tensors) {
auto FeedInput = [&] { auto FeedInput = [&] {
for (size_t i = 0; i < feed_names_.size(); ++i) { for (size_t i = 0; i < feed_names_.size(); ++i) {
auto* feed_var = global_scope_->FindVar(feed_names_[i]); auto* feed_var = global_scope_->FindVar(feed_names_[i]);
...@@ -504,6 +505,7 @@ void InterpreterCore::DryRunPrepare( ...@@ -504,6 +505,7 @@ void InterpreterCore::DryRunPrepare(
auto feed_tensor = feed_var->GetMutable<framework::LoDTensor>(); auto feed_tensor = feed_var->GetMutable<framework::LoDTensor>();
feed_tensor->ShareDataWith(feed_tensors[i]); feed_tensor->ShareDataWith(feed_tensors[i]);
feed_tensor->set_lod(feed_tensors[i].lod());
} }
}; };
...@@ -525,7 +527,7 @@ void InterpreterCore::DryRunPrepare( ...@@ -525,7 +527,7 @@ void InterpreterCore::DryRunPrepare(
} }
const CostInfo& InterpreterCore::DryRun( const CostInfo& InterpreterCore::DryRun(
const std::vector<framework::Tensor>& feed_tensors) { const std::vector<framework::LoDTensor>& feed_tensors) {
DryRunPrepare(feed_tensors); DryRunPrepare(feed_tensors);
// DryRun may be called many times. // DryRun may be called many times.
dry_run_profiler_.Reset(); dry_run_profiler_.Reset();
......
...@@ -46,9 +46,9 @@ class InterpreterCore { ...@@ -46,9 +46,9 @@ class InterpreterCore {
const std::vector<std::string>& fetch_names); const std::vector<std::string>& fetch_names);
paddle::framework::FetchList Run( paddle::framework::FetchList Run(
const std::vector<framework::Tensor>& feed_tensors); const std::vector<framework::LoDTensor>& feed_tensors);
const CostInfo& DryRun(const std::vector<framework::Tensor>& feed_tensors); const CostInfo& DryRun(const std::vector<framework::LoDTensor>& feed_tensors);
private: private:
void Convert(); void Convert();
...@@ -65,7 +65,7 @@ class InterpreterCore { ...@@ -65,7 +65,7 @@ class InterpreterCore {
void ExecuteInstructionList(const std::vector<Instruction>& vec_instr); void ExecuteInstructionList(const std::vector<Instruction>& vec_instr);
void DryRunPrepare(const std::vector<framework::Tensor>& feed_tensors); void DryRunPrepare(const std::vector<framework::LoDTensor>& feed_tensors);
void CheckGC(const Instruction& instr); void CheckGC(const Instruction& instr);
......
...@@ -287,7 +287,7 @@ void build_op_func_list(const platform::Place& place, ...@@ -287,7 +287,7 @@ void build_op_func_list(const platform::Place& place,
for (size_t i = 0; i < var_name_item.second.size(); ++i) { for (size_t i = 0; i < var_name_item.second.size(); ++i) {
auto var = var_name_item.second[i]; auto var = var_name_item.second[i];
auto& var_name = inputs_names[var_name_item.first].at(i); auto& var_name = inputs_names[var_name_item.first].at(i);
auto tensor_in = static_cast<const Tensor*>(&(var->Get<LoDTensor>())); auto tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var);
if (!tensor_in->IsInitialized()) { if (!tensor_in->IsInitialized()) {
continue; continue;
} }
...@@ -296,7 +296,9 @@ void build_op_func_list(const platform::Place& place, ...@@ -296,7 +296,9 @@ void build_op_func_list(const platform::Place& place,
->GetKernelTypeForVar(var_name_item.first, *tensor_in, ->GetKernelTypeForVar(var_name_item.first, *tensor_in,
expected_kernel_key); expected_kernel_key);
if (platform::is_same_place(kernel_type_for_var.place_, if (platform::is_same_place(kernel_type_for_var.place_,
expected_kernel_key.place_)) { expected_kernel_key.place_) ||
(is_cuda_pinned_place(kernel_type_for_var.place_) &&
is_cpu_place(expected_kernel_key.place_))) {
// record no need data transformer input var_id // record no need data transformer input var_id
VLOG(3) << op->Type() << " found no data_transform var: " << var_name VLOG(3) << op->Type() << " found no data_transform var: " << var_name
<< " with id: " << var_name; << " with id: " << var_name;
......
...@@ -47,7 +47,7 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place, ...@@ -47,7 +47,7 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
paddle::framework::FetchList StandaloneExecutor::Run( paddle::framework::FetchList StandaloneExecutor::Run(
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
const std::vector<framework::Tensor>& feed_tensors, const std::vector<framework::LoDTensor>& feed_tensors,
const std::vector<std::string>& fetch_names) { const std::vector<std::string>& fetch_names) {
auto core = GetInterpreterCore(feed_names, fetch_names); auto core = GetInterpreterCore(feed_names, fetch_names);
...@@ -56,7 +56,7 @@ paddle::framework::FetchList StandaloneExecutor::Run( ...@@ -56,7 +56,7 @@ paddle::framework::FetchList StandaloneExecutor::Run(
const CostInfo& StandaloneExecutor::DryRun( const CostInfo& StandaloneExecutor::DryRun(
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
const std::vector<framework::Tensor>& feed_tensors) { const std::vector<framework::LoDTensor>& feed_tensors) {
auto core = GetInterpreterCore(feed_names, {}); auto core = GetInterpreterCore(feed_names, {});
auto& cost_info = core->DryRun(feed_tensors); auto& cost_info = core->DryRun(feed_tensors);
......
...@@ -28,7 +28,7 @@ class ExecutorBase { ...@@ -28,7 +28,7 @@ class ExecutorBase {
virtual ~ExecutorBase() {} virtual ~ExecutorBase() {}
virtual paddle::framework::FetchList Run( virtual paddle::framework::FetchList Run(
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
const std::vector<framework::Tensor>& feed_tensors, const std::vector<framework::LoDTensor>& feed_tensors,
const std::vector<std::string>& fetch_names) = 0; const std::vector<std::string>& fetch_names) = 0;
}; };
...@@ -42,11 +42,11 @@ class StandaloneExecutor : public ExecutorBase { ...@@ -42,11 +42,11 @@ class StandaloneExecutor : public ExecutorBase {
virtual paddle::framework::FetchList Run( virtual paddle::framework::FetchList Run(
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
const std::vector<framework::Tensor>& feed_tensors, const std::vector<framework::LoDTensor>& feed_tensors,
const std::vector<std::string>& fetch_names); const std::vector<std::string>& fetch_names);
const CostInfo& DryRun(const std::vector<std::string>& feed_names, const CostInfo& DryRun(const std::vector<std::string>& feed_names,
const std::vector<framework::Tensor>& feed_tensors); const std::vector<framework::LoDTensor>& feed_tensors);
private: private:
void BuildVariableOuterScope(const framework::ProgramDesc& pdesc, void BuildVariableOuterScope(const framework::ProgramDesc& pdesc,
......
...@@ -128,9 +128,12 @@ class FetchV2Kernel { ...@@ -128,9 +128,12 @@ class FetchV2Kernel {
if (fetch_var->IsType<framework::LoDTensor>()) { if (fetch_var->IsType<framework::LoDTensor>()) {
auto &src_item = fetch_var->Get<framework::LoDTensor>(); auto &src_item = fetch_var->Get<framework::LoDTensor>();
auto *dst_item = &(BOOST_GET(framework::LoDTensor, fetch_list->at(col))); auto *dst_item = &(BOOST_GET(framework::LoDTensor, fetch_list->at(col)));
PADDLE_ENFORCE_EQ(platform::is_cpu_place(src_item.place()), true, bool check_place = platform::is_cpu_place(src_item.place()) ||
platform::errors::InvalidArgument( platform::is_cuda_pinned_place(src_item.place());
"Tensor's place of input(X) must be CPUPlace.")); PADDLE_ENFORCE_EQ(
check_place, true,
platform::errors::InvalidArgument("Tensor's place of input(X) must "
"be CPUPlace or CUDAPinnedPlace."));
if (deepcopy) { if (deepcopy) {
DeepCopy(src_item, fetch_var_name, dst_item); DeepCopy(src_item, fetch_var_name, dst_item);
} else { } else {
...@@ -188,8 +191,11 @@ REGISTER_OPERATOR( ...@@ -188,8 +191,11 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL_FUNCTOR(fetch_v2, float, ops::FetchV2Kernel, double, REGISTER_OP_CPU_KERNEL_FUNCTOR(
ops::FetchV2Kernel, int, ops::FetchV2Kernel, fetch_v2, float, ops::FetchV2Kernel, double, ops::FetchV2Kernel, int8_t,
int64_t, ops::FetchV2Kernel, bool, ops::FetchV2Kernel, uint8_t, ops::FetchV2Kernel, int, ops::FetchV2Kernel,
ops::FetchV2Kernel, plat::float16, int64_t, ops::FetchV2Kernel, bool, ops::FetchV2Kernel,
ops::FetchV2Kernel); paddle::platform::bfloat16, ops::FetchV2Kernel,
paddle::platform::complex<float>, ops::FetchV2Kernel,
paddle::platform::complex<double>, ops::FetchV2Kernel, plat::float16,
ops::FetchV2Kernel, int16_t, ops::FetchV2Kernel);
...@@ -125,24 +125,33 @@ REGISTER_OPERATOR( ...@@ -125,24 +125,33 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double, REGISTER_OP_CPU_KERNEL_FUNCTOR(
ops::MemcpyD2HKernel, int, ops::MemcpyD2HKernel, memcpy_d2h, float, ops::MemcpyD2HKernel, double, ops::MemcpyD2HKernel,
int64_t, ops::MemcpyD2HKernel, bool, int8_t, ops::MemcpyD2HKernel, uint8_t, ops::MemcpyD2HKernel, int,
ops::MemcpyD2HKernel, plat::float16, ops::MemcpyD2HKernel, int64_t, ops::MemcpyD2HKernel, bool,
ops::MemcpyD2HKernel); ops::MemcpyD2HKernel, paddle::platform::bfloat16, ops::MemcpyD2HKernel,
paddle::platform::complex<float>, ops::MemcpyD2HKernel,
paddle::platform::complex<double>, ops::MemcpyD2HKernel, plat::float16,
ops::MemcpyD2HKernel, int16_t, ops::MemcpyD2HKernel);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double, REGISTER_OP_CUDA_KERNEL_FUNCTOR(
ops::MemcpyD2HKernel, int, ops::MemcpyD2HKernel, memcpy_d2h, float, ops::MemcpyD2HKernel, double, ops::MemcpyD2HKernel,
int64_t, ops::MemcpyD2HKernel, bool, int8_t, ops::MemcpyD2HKernel, uint8_t, ops::MemcpyD2HKernel, int,
ops::MemcpyD2HKernel, plat::float16, ops::MemcpyD2HKernel, int64_t, ops::MemcpyD2HKernel, bool,
ops::MemcpyD2HKernel); ops::MemcpyD2HKernel, paddle::platform::bfloat16, ops::MemcpyD2HKernel,
paddle::platform::complex<float>, ops::MemcpyD2HKernel,
paddle::platform::complex<double>, ops::MemcpyD2HKernel, plat::float16,
ops::MemcpyD2HKernel, int16_t, ops::MemcpyD2HKernel);
#endif #endif
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
REGISTER_OP_NPU_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double, REGISTER_OP_NPU_KERNEL_FUNCTOR(
ops::MemcpyD2HKernel, int, ops::MemcpyD2HKernel, memcpy_d2h, float, ops::MemcpyD2HKernel, double, ops::MemcpyD2HKernel,
int64_t, ops::MemcpyD2HKernel, bool, int8_t, ops::MemcpyD2HKernel, uint8_t, ops::MemcpyD2HKernel, int,
ops::MemcpyD2HKernel, plat::float16, ops::MemcpyD2HKernel, int64_t, ops::MemcpyD2HKernel, bool,
ops::MemcpyD2HKernel); ops::MemcpyD2HKernel, paddle::platform::bfloat16, ops::MemcpyD2HKernel,
paddle::platform::complex<float>, ops::MemcpyD2HKernel,
paddle::platform::complex<double>, ops::MemcpyD2HKernel, plat::float16,
ops::MemcpyD2HKernel, int16_t, ops::MemcpyD2HKernel);
#endif #endif
...@@ -125,24 +125,33 @@ REGISTER_OPERATOR( ...@@ -125,24 +125,33 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double, REGISTER_OP_CPU_KERNEL_FUNCTOR(
ops::MemcpyH2DKernel, int, ops::MemcpyH2DKernel, memcpy_h2d, float, ops::MemcpyH2DKernel, double, ops::MemcpyH2DKernel,
int64_t, ops::MemcpyH2DKernel, bool, int8_t, ops::MemcpyH2DKernel, uint8_t, ops::MemcpyH2DKernel, int,
ops::MemcpyH2DKernel, plat::float16, ops::MemcpyH2DKernel, int64_t, ops::MemcpyH2DKernel, bool,
ops::MemcpyH2DKernel); ops::MemcpyH2DKernel, paddle::platform::bfloat16, ops::MemcpyH2DKernel,
paddle::platform::complex<float>, ops::MemcpyH2DKernel,
paddle::platform::complex<double>, ops::MemcpyH2DKernel, plat::float16,
ops::MemcpyH2DKernel, int16_t, ops::MemcpyH2DKernel);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double, REGISTER_OP_CUDA_KERNEL_FUNCTOR(
ops::MemcpyH2DKernel, int, ops::MemcpyH2DKernel, memcpy_h2d, float, ops::MemcpyH2DKernel, double, ops::MemcpyH2DKernel,
int64_t, ops::MemcpyH2DKernel, bool, int8_t, ops::MemcpyH2DKernel, uint8_t, ops::MemcpyH2DKernel, int,
ops::MemcpyH2DKernel, plat::float16, ops::MemcpyH2DKernel, int64_t, ops::MemcpyH2DKernel, bool,
ops::MemcpyH2DKernel); ops::MemcpyH2DKernel, paddle::platform::bfloat16, ops::MemcpyH2DKernel,
paddle::platform::complex<float>, ops::MemcpyH2DKernel,
paddle::platform::complex<double>, ops::MemcpyH2DKernel, plat::float16,
ops::MemcpyH2DKernel, int16_t, ops::MemcpyH2DKernel);
#endif #endif
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
REGISTER_OP_NPU_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double, REGISTER_OP_NPU_KERNEL_FUNCTOR(
ops::MemcpyH2DKernel, int, ops::MemcpyH2DKernel, memcpy_h2d, float, ops::MemcpyH2DKernel, double, ops::MemcpyH2DKernel,
int64_t, ops::MemcpyH2DKernel, bool, int8_t, ops::MemcpyH2DKernel, uint8_t, ops::MemcpyH2DKernel, int,
ops::MemcpyH2DKernel, plat::float16, ops::MemcpyH2DKernel, int64_t, ops::MemcpyH2DKernel, bool,
ops::MemcpyH2DKernel); ops::MemcpyH2DKernel, paddle::platform::bfloat16, ops::MemcpyH2DKernel,
paddle::platform::complex<float>, ops::MemcpyH2DKernel,
paddle::platform::complex<double>, ops::MemcpyH2DKernel, plat::float16,
ops::MemcpyH2DKernel, int16_t, ops::MemcpyH2DKernel);
#endif #endif
...@@ -2046,7 +2046,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -2046,7 +2046,7 @@ All parameter, weight, gradient are variables in Paddle.
[](StandaloneExecutor &self, [](StandaloneExecutor &self,
const std::unordered_map<std::string, py::array> &input_dict, const std::unordered_map<std::string, py::array> &input_dict,
std::vector<std::string> fetch_names) { std::vector<std::string> fetch_names) {
std::vector<framework::Tensor> feed_tensors; std::vector<framework::LoDTensor> feed_tensors;
std::vector<std::string> feed_names; std::vector<std::string> feed_names;
for (auto &item : input_dict) { for (auto &item : input_dict) {
...@@ -2066,10 +2066,10 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -2066,10 +2066,10 @@ All parameter, weight, gradient are variables in Paddle.
}) })
.def("run", .def("run",
[](StandaloneExecutor &self, [](StandaloneExecutor &self,
const std::unordered_map<std::string, framework::Tensor> const std::unordered_map<std::string, framework::LoDTensor>
&input_dict, &input_dict,
std::vector<std::string> fetch_names) { std::vector<std::string> fetch_names) {
std::vector<framework::Tensor> feed_tensors; std::vector<framework::LoDTensor> feed_tensors;
std::vector<std::string> feed_names; std::vector<std::string> feed_names;
for (auto &item : input_dict) { for (auto &item : input_dict) {
...@@ -2087,7 +2087,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -2087,7 +2087,7 @@ All parameter, weight, gradient are variables in Paddle.
.def("dry_run", .def("dry_run",
[](StandaloneExecutor &self, [](StandaloneExecutor &self,
const std::unordered_map<std::string, py::array> &input_dict) { const std::unordered_map<std::string, py::array> &input_dict) {
std::vector<framework::Tensor> feed_tensors; std::vector<framework::LoDTensor> feed_tensors;
std::vector<std::string> feed_names; std::vector<std::string> feed_names;
for (auto &item : input_dict) { for (auto &item : input_dict) {
......
...@@ -485,10 +485,11 @@ handler = FetchHandlerExample(var_dict=var_dict) ...@@ -485,10 +485,11 @@ handler = FetchHandlerExample(var_dict=var_dict)
class _StandaloneExecutor(object): class _StandaloneExecutor(object):
def __init__(self, place, main_program): def __init__(self, place, main_program, scope):
self._place = core.Place() self._place = core.Place()
self._place.set_place(place) self._place.set_place(place)
self._main_program = main_program self._main_program = main_program
self._scope = scope
self._new_exe = self._create_new_executor() self._new_exe = self._create_new_executor()
def run(self, feed, fetch_list, return_numpy=True): def run(self, feed, fetch_list, return_numpy=True):
...@@ -522,9 +523,8 @@ class _StandaloneExecutor(object): ...@@ -522,9 +523,8 @@ class _StandaloneExecutor(object):
def _create_new_executor(self): def _create_new_executor(self):
# NOTE: It's a trick to set empty start_up program. # NOTE: It's a trick to set empty start_up program.
startup_program = Program() startup_program = Program()
outer_scope = global_scope()
new_exe = core.StandaloneExecutor(self._place, startup_program.desc, new_exe = core.StandaloneExecutor(self._place, startup_program.desc,
self._main_program.desc, outer_scope) self._main_program.desc, self._scope)
return new_exe return new_exe
...@@ -585,11 +585,11 @@ class _ExecutorCache(object): ...@@ -585,11 +585,11 @@ class _ExecutorCache(object):
self._place = place self._place = place
self._cached_executors = {} self._cached_executors = {}
def run(self, program, feed, fetch_list, return_numpy=True): def run(self, program, scope, feed, fetch_list, return_numpy=True):
new_exe = self._get_exe_from_cache(program) new_exe = self._get_exe_from_cache(program, scope)
return new_exe.run(feed, fetch_list, return_numpy) return new_exe.run(feed, fetch_list, return_numpy)
def _get_exe_from_cache(self, program): def _get_exe_from_cache(self, program, scope):
""" """
Return cached _StandaloneExecutor instance. If not found, create associated Return cached _StandaloneExecutor instance. If not found, create associated
_StandaloneExecutor instance with given program and cache it. _StandaloneExecutor instance with given program and cache it.
...@@ -598,7 +598,7 @@ class _ExecutorCache(object): ...@@ -598,7 +598,7 @@ class _ExecutorCache(object):
program, Program), "Required type(Program), but received {}".format( program, Program), "Required type(Program), but received {}".format(
type(program).__name__) type(program).__name__)
if program not in self._cached_executors: if program not in self._cached_executors:
new_exe = _StandaloneExecutor(self._place, program) new_exe = _StandaloneExecutor(self._place, program, scope)
self._cached_executors[program] = new_exe self._cached_executors[program] = new_exe
return self._cached_executors[program] return self._cached_executors[program]
...@@ -1297,7 +1297,7 @@ class Executor(object): ...@@ -1297,7 +1297,7 @@ class Executor(object):
# NOTE: This is an experimental feature. If `export FLAGS_USE_STANDALONE_EXECUTOR=1 `, # NOTE: This is an experimental feature. If `export FLAGS_USE_STANDALONE_EXECUTOR=1 `,
# use StandaloneExecutor to run the program. # use StandaloneExecutor to run the program.
if self._enable_interpreter_core and not program._is_start_up_program_: if self._enable_interpreter_core and not program._is_start_up_program_:
return self._executor_cache.run(program, feed, fetch_list, return self._executor_cache.run(program, scope, feed, fetch_list,
return_numpy) return_numpy)
# use_prune can be overrided by putting optimize_ops in fetch_list # use_prune can be overrided by putting optimize_ops in fetch_list
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册