未验证 提交 5c0b60ae 编写于 作者: R Ruibiao Chen 提交者: GitHub

Remove DryRun in standalone executor (#54222)

上级 ea8f1998
...@@ -158,38 +158,6 @@ InterpreterCore::~InterpreterCore() { ...@@ -158,38 +158,6 @@ InterpreterCore::~InterpreterCore() {
#endif #endif
} }
interpreter::CostInfo InterpreterCore::DryRun(
const std::vector<std::string>& feed_names,
const std::vector<phi::DenseTensor>& feed_tensors) {
SetDeviceId(place_);
CheckCUDAGraphBeforeRun(feed_names);
Prepare(feed_names, feed_tensors, true);
interpreter::CostInfo cost_info;
{
interpreter::ProfilerGuard(place_, &cost_info);
// For the program that only run once, it is no need to
// create work_queue, so the async_work_queue_ is created
// until the second step run.
async_work_queue_ = GetWorkQueue();
// lazy initialization of gc, do not create gc is the program only run once
if (!gc_) {
gc_ = CreateInterpreterCoreGarbageCollector(place_, vec_instruction_);
}
ExecuteInstructionList(vec_instruction_);
platform::DeviceContextPool::Instance().Get(place_)->Wait();
}
if (HasLocalScope()) {
ClearLoDTensorArrayInLocalScope();
}
return cost_info;
}
void InterpreterCore::RunImpl() { void InterpreterCore::RunImpl() {
// lazy initialization of gc, do not create gc is the program only run once // lazy initialization of gc, do not create gc is the program only run once
if (!gc_) { if (!gc_) {
...@@ -1540,24 +1508,5 @@ void InterpreterCore::AnalyseExecuteOrderForTrace() { ...@@ -1540,24 +1508,5 @@ void InterpreterCore::AnalyseExecuteOrderForTrace() {
trace_execute_order_ = trace_order; trace_execute_order_ = trace_order;
} }
std::shared_ptr<InterpreterCore> CreateInterpreterCore(
const platform::Place& place,
const ProgramDesc& prog,
Scope* scope,
const std::vector<std::string>& fetch_names,
const interpreter::ExecutionConfig& execution_config) {
std::shared_ptr<InterpreterCore> core = nullptr;
// NOTE(Aurelius84): `AddFetch` will modify BlockDesc, so we should copy
// a new program.
auto new_prog = std::make_shared<framework::ProgramDesc>(prog);
auto* block = new_prog->MutableBlock(0);
interpreter::AddFetch(fetch_names, block);
core =
std::make_shared<InterpreterCore>(place, *block, scope, execution_config);
core->SetCopyProgram(new_prog);
return core;
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -54,10 +54,6 @@ class InterpreterCore { ...@@ -54,10 +54,6 @@ class InterpreterCore {
~InterpreterCore(); ~InterpreterCore();
interpreter::CostInfo DryRun(
const std::vector<std::string>& feed_names,
const std::vector<phi::DenseTensor>& feed_tensors);
paddle::framework::FetchList Run( paddle::framework::FetchList Run(
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
const std::vector<phi::DenseTensor>& feed_tensors); const std::vector<phi::DenseTensor>& feed_tensors);
...@@ -190,13 +186,5 @@ class InterpreterCore { ...@@ -190,13 +186,5 @@ class InterpreterCore {
InstructionSchedulingPriorityLess instruction_scheduling_priority_less; InstructionSchedulingPriorityLess instruction_scheduling_priority_less;
}; };
std::shared_ptr<InterpreterCore> CreateInterpreterCore(
const platform::Place& place,
const ProgramDesc& prog,
Scope* scope,
const std::vector<std::string>& fetch_names = {},
const interpreter::ExecutionConfig& execution_config =
interpreter::ExecutionConfig());
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -28,27 +28,17 @@ paddle::framework::FetchList StandaloneExecutor::Run( ...@@ -28,27 +28,17 @@ paddle::framework::FetchList StandaloneExecutor::Run(
const std::vector<std::string>& fetch_names) { const std::vector<std::string>& fetch_names) {
platform::RecordEvent record_event( platform::RecordEvent record_event(
"StandaloneExecutor::run", platform::TracerEventType::UserDefined, 1); "StandaloneExecutor::run", platform::TracerEventType::UserDefined, 1);
auto core = GetInterpreterCore(scope, prog_, feed_names, fetch_names, false); auto core = GetInterpreterCore(scope, prog_, feed_names, fetch_names);
VLOG(4) << "StandaloneExecutor: " << this << ", InterpreterCore: " << core; VLOG(4) << "StandaloneExecutor: " << this << ", InterpreterCore: " << core;
return core->Run(feed_names); return core->Run(feed_names);
} }
framework::interpreter::CostInfo StandaloneExecutor::DryRun(
Scope* scope,
const std::vector<std::string>& feed_names,
const std::vector<phi::DenseTensor>& feed_tensors) {
auto core = GetInterpreterCore(scope, prog_, feed_names, {}, true);
return core->DryRun(feed_names, feed_tensors);
}
std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore( std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
Scope* scope, Scope* scope,
const ProgramDesc& prog, const ProgramDesc& prog,
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names, const std::vector<std::string>& fetch_names) {
bool add_fetch_op) {
std::ostringstream oss; std::ostringstream oss;
oss << "feed:"; oss << "feed:";
for (auto& feedname : feed_names) { for (auto& feedname : feed_names) {
...@@ -65,14 +55,8 @@ std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore( ...@@ -65,14 +55,8 @@ std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
if (iter == interpretercores_.end()) { if (iter == interpretercores_.end()) {
VLOG(3) << "create interpreter_core for " << oss.str() << " on place " VLOG(3) << "create interpreter_core for " << oss.str() << " on place "
<< place_; << place_;
VLOG(3) << "add fetch op: " << add_fetch_op; std::shared_ptr<InterpreterCore> core =
std::shared_ptr<InterpreterCore> core = nullptr; std::make_shared<InterpreterCore>(place_, prog.Block(0), scope);
if (add_fetch_op) {
core = CreateInterpreterCore(place_, prog, scope, fetch_names);
} else {
core = std::make_shared<InterpreterCore>(place_, prog.Block(0), scope);
}
interpretercores_.emplace(oss.str(), core); interpretercores_.emplace(oss.str(), core);
return core; return core;
} else { } else {
......
...@@ -42,18 +42,12 @@ class StandaloneExecutor { ...@@ -42,18 +42,12 @@ class StandaloneExecutor {
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names); const std::vector<std::string>& fetch_names);
framework::interpreter::CostInfo DryRun(
Scope* scope,
const std::vector<std::string>& feed_names,
const std::vector<phi::DenseTensor>& feed_tensors);
private: private:
std::shared_ptr<InterpreterCore> GetInterpreterCore( std::shared_ptr<InterpreterCore> GetInterpreterCore(
Scope* scope, Scope* scope,
const ProgramDesc& prog, const ProgramDesc& prog,
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names, const std::vector<std::string>& fetch_names);
bool add_fetch_op);
platform::Place place_; platform::Place place_;
const ProgramDesc& prog_; const ProgramDesc& prog_;
......
...@@ -1852,28 +1852,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1852,28 +1852,6 @@ All parameter, weight, gradient are variables in Paddle.
ret = self.Run(scope, feed_names, fetch_names); ret = self.Run(scope, feed_names, fetch_names);
} }
return py::cast(std::move(ret)); return py::cast(std::move(ret));
})
.def("dry_run",
[](StandaloneExecutor &self,
Scope *scope,
const std::unordered_map<std::string, py::array> &input_dict) {
std::vector<phi::DenseTensor> feed_tensors;
std::vector<std::string> feed_names;
for (auto &item : input_dict) {
phi::DenseTensor t;
SetTensorFromPyArray<platform::CPUPlace>(
&t, item.second, platform::CPUPlace(), false);
feed_names.push_back(item.first);
feed_tensors.push_back(t);
}
framework::interpreter::CostInfo cost_info;
{
pybind11::gil_scoped_release release;
cost_info = self.DryRun(scope, feed_names, feed_tensors);
}
return cost_info;
}); });
m.def("init_gflags", framework::InitGflags); m.def("init_gflags", framework::InitGflags);
......
...@@ -174,7 +174,9 @@ TEST(InterpreterCore, skip_gc_vars) { ...@@ -174,7 +174,9 @@ TEST(InterpreterCore, skip_gc_vars) {
Scope scope; Scope scope;
std::shared_ptr<InterpreterCore> startup_core = std::shared_ptr<InterpreterCore> startup_core =
CreateInterpreterCore(place, startup_prog, &scope); std::make_shared<InterpreterCore>(
place, startup_prog.Block(0), &scope, interpreter::ExecutionConfig());
startup_core->Run({}, {}); startup_core->Run({}, {});
std::set<std::string> skip_gc_vars = {"uniform_0.tmp_0", std::set<std::string> skip_gc_vars = {"uniform_0.tmp_0",
...@@ -191,8 +193,9 @@ TEST(InterpreterCore, skip_gc_vars) { ...@@ -191,8 +193,9 @@ TEST(InterpreterCore, skip_gc_vars) {
interpreter::ExecutionConfig execution_config; interpreter::ExecutionConfig execution_config;
execution_config.skip_gc_vars = skip_gc_vars; execution_config.skip_gc_vars = skip_gc_vars;
std::shared_ptr<InterpreterCore> main_core = CreateInterpreterCore( std::shared_ptr<InterpreterCore> main_core =
place, main_prog, &scope, /*fetch_names=*/{}, execution_config); std::make_shared<InterpreterCore>(
place, main_prog.Block(0), &scope, execution_config);
auto check_gc_result = auto check_gc_result =
[](Scope& scope, std::set<std::string>& vars, bool is_skip_gc) { [](Scope& scope, std::set<std::string>& vars, bool is_skip_gc) {
...@@ -225,10 +228,10 @@ void TestShareWorkQueue(const ProgramDesc& prog, ...@@ -225,10 +228,10 @@ void TestShareWorkQueue(const ProgramDesc& prog,
const platform::CPUPlace place = platform::CPUPlace(); const platform::CPUPlace place = platform::CPUPlace();
Scope scope; Scope scope;
std::shared_ptr<InterpreterCore> core1 = std::shared_ptr<InterpreterCore> core1 = std::make_shared<InterpreterCore>(
CreateInterpreterCore(place, prog, &scope, fetch_names); place, prog.Block(0), &scope, interpreter::ExecutionConfig());
std::shared_ptr<InterpreterCore> core2 = std::shared_ptr<InterpreterCore> core2 = std::make_shared<InterpreterCore>(
CreateInterpreterCore(place, prog, &scope, fetch_names); place, prog.Block(0), &scope, interpreter::ExecutionConfig());
core2->ShareWorkQueueFrom(core1); core2->ShareWorkQueueFrom(core1);
auto run_and_check = [&feed_names, &feed_tensors, &fetch_results]( auto run_and_check = [&feed_names, &feed_tensors, &fetch_results](
......
...@@ -17,64 +17,17 @@ import os ...@@ -17,64 +17,17 @@ import os
os.environ['FLAGS_use_stream_safe_cuda_allocator'] = "true" os.environ['FLAGS_use_stream_safe_cuda_allocator'] = "true"
import json import json
import shutil import shutil
import sys
import unittest import unittest
import numpy as np import numpy as np
import paddle import paddle
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.core import StandaloneExecutor
from paddle.profiler import profiler from paddle.profiler import profiler
paddle.enable_static() paddle.enable_static()
class TestDryRun(unittest.TestCase):
def setUp(self):
place = (
paddle.CUDAPlace(0)
if core.is_compiled_with_cuda()
else paddle.CPUPlace()
)
self.place = core.Place()
self.place.set_place(place)
def build_program(self):
startup_program = paddle.static.Program()
main_program = paddle.static.Program()
with paddle.static.program_guard(main_program, startup_program):
a = paddle.static.data(name="a", shape=[2, 2], dtype='float32')
b = paddle.ones([2, 2]) * 2
t = paddle.static.nn.fc(a, 2)
c = t + b
return startup_program, main_program, c
def test_dry_run(self):
scope = core.Scope()
startup_program, main_program, c = self.build_program()
exe = paddle.static.Executor(self.place)
exe.run(startup_program, scope=scope)
standaloneexecutor = StandaloneExecutor(self.place, main_program.desc)
# test for cost_info
cost_info = standaloneexecutor.dry_run(
scope, {"a": np.ones([2, 2], dtype="float32")}
)
self.check_cost_info(cost_info)
def check_cost_info(self, cost_info):
IS_WINDOWS = sys.platform.startswith('win')
if core.is_compiled_with_cuda():
# # w,bias,b, out, memory block is at least 256 bytes on Linux
gt = 16 * 4 if IS_WINDOWS else 256 * 4
self.assertGreater(cost_info.device_memory_bytes(), gt)
else:
self.assertEqual(cost_info.device_memory_bytes(), 0)
def build_program(): def build_program():
main_program = paddle.static.Program() main_program = paddle.static.Program()
startup_program = paddle.static.Program() startup_program = paddle.static.Program()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册