未验证 提交 5c0b60ae 编写于 作者: R Ruibiao Chen 提交者: GitHub

Remove DryRun in standalone executor (#54222)

上级 ea8f1998
......@@ -158,38 +158,6 @@ InterpreterCore::~InterpreterCore() {
#endif
}
interpreter::CostInfo InterpreterCore::DryRun(
const std::vector<std::string>& feed_names,
const std::vector<phi::DenseTensor>& feed_tensors) {
SetDeviceId(place_);
CheckCUDAGraphBeforeRun(feed_names);
Prepare(feed_names, feed_tensors, true);
interpreter::CostInfo cost_info;
{
interpreter::ProfilerGuard(place_, &cost_info);
// For the program that only run once, it is no need to
// create work_queue, so the async_work_queue_ is created
// until the second step run.
async_work_queue_ = GetWorkQueue();
// lazy initialization of gc, do not create gc is the program only run once
if (!gc_) {
gc_ = CreateInterpreterCoreGarbageCollector(place_, vec_instruction_);
}
ExecuteInstructionList(vec_instruction_);
platform::DeviceContextPool::Instance().Get(place_)->Wait();
}
if (HasLocalScope()) {
ClearLoDTensorArrayInLocalScope();
}
return cost_info;
}
void InterpreterCore::RunImpl() {
// lazy initialization of gc, do not create gc is the program only run once
if (!gc_) {
......@@ -1540,24 +1508,5 @@ void InterpreterCore::AnalyseExecuteOrderForTrace() {
trace_execute_order_ = trace_order;
}
std::shared_ptr<InterpreterCore> CreateInterpreterCore(
const platform::Place& place,
const ProgramDesc& prog,
Scope* scope,
const std::vector<std::string>& fetch_names,
const interpreter::ExecutionConfig& execution_config) {
std::shared_ptr<InterpreterCore> core = nullptr;
// NOTE(Aurelius84): `AddFetch` will modify BlockDesc, so we should copy
// a new program.
auto new_prog = std::make_shared<framework::ProgramDesc>(prog);
auto* block = new_prog->MutableBlock(0);
interpreter::AddFetch(fetch_names, block);
core =
std::make_shared<InterpreterCore>(place, *block, scope, execution_config);
core->SetCopyProgram(new_prog);
return core;
}
} // namespace framework
} // namespace paddle
......@@ -54,10 +54,6 @@ class InterpreterCore {
~InterpreterCore();
interpreter::CostInfo DryRun(
const std::vector<std::string>& feed_names,
const std::vector<phi::DenseTensor>& feed_tensors);
paddle::framework::FetchList Run(
const std::vector<std::string>& feed_names,
const std::vector<phi::DenseTensor>& feed_tensors);
......@@ -190,13 +186,5 @@ class InterpreterCore {
InstructionSchedulingPriorityLess instruction_scheduling_priority_less;
};
std::shared_ptr<InterpreterCore> CreateInterpreterCore(
const platform::Place& place,
const ProgramDesc& prog,
Scope* scope,
const std::vector<std::string>& fetch_names = {},
const interpreter::ExecutionConfig& execution_config =
interpreter::ExecutionConfig());
} // namespace framework
} // namespace paddle
......@@ -28,27 +28,17 @@ paddle::framework::FetchList StandaloneExecutor::Run(
const std::vector<std::string>& fetch_names) {
platform::RecordEvent record_event(
"StandaloneExecutor::run", platform::TracerEventType::UserDefined, 1);
auto core = GetInterpreterCore(scope, prog_, feed_names, fetch_names, false);
auto core = GetInterpreterCore(scope, prog_, feed_names, fetch_names);
VLOG(4) << "StandaloneExecutor: " << this << ", InterpreterCore: " << core;
return core->Run(feed_names);
}
framework::interpreter::CostInfo StandaloneExecutor::DryRun(
Scope* scope,
const std::vector<std::string>& feed_names,
const std::vector<phi::DenseTensor>& feed_tensors) {
auto core = GetInterpreterCore(scope, prog_, feed_names, {}, true);
return core->DryRun(feed_names, feed_tensors);
}
std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
Scope* scope,
const ProgramDesc& prog,
const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names,
bool add_fetch_op) {
const std::vector<std::string>& fetch_names) {
std::ostringstream oss;
oss << "feed:";
for (auto& feedname : feed_names) {
......@@ -65,14 +55,8 @@ std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
if (iter == interpretercores_.end()) {
VLOG(3) << "create interpreter_core for " << oss.str() << " on place "
<< place_;
VLOG(3) << "add fetch op: " << add_fetch_op;
std::shared_ptr<InterpreterCore> core = nullptr;
if (add_fetch_op) {
core = CreateInterpreterCore(place_, prog, scope, fetch_names);
} else {
core = std::make_shared<InterpreterCore>(place_, prog.Block(0), scope);
}
std::shared_ptr<InterpreterCore> core =
std::make_shared<InterpreterCore>(place_, prog.Block(0), scope);
interpretercores_.emplace(oss.str(), core);
return core;
} else {
......
......@@ -42,18 +42,12 @@ class StandaloneExecutor {
const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names);
framework::interpreter::CostInfo DryRun(
Scope* scope,
const std::vector<std::string>& feed_names,
const std::vector<phi::DenseTensor>& feed_tensors);
private:
std::shared_ptr<InterpreterCore> GetInterpreterCore(
Scope* scope,
const ProgramDesc& prog,
const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names,
bool add_fetch_op);
const std::vector<std::string>& fetch_names);
platform::Place place_;
const ProgramDesc& prog_;
......
......@@ -1852,28 +1852,6 @@ All parameter, weight, gradient are variables in Paddle.
ret = self.Run(scope, feed_names, fetch_names);
}
return py::cast(std::move(ret));
})
.def("dry_run",
[](StandaloneExecutor &self,
Scope *scope,
const std::unordered_map<std::string, py::array> &input_dict) {
std::vector<phi::DenseTensor> feed_tensors;
std::vector<std::string> feed_names;
for (auto &item : input_dict) {
phi::DenseTensor t;
SetTensorFromPyArray<platform::CPUPlace>(
&t, item.second, platform::CPUPlace(), false);
feed_names.push_back(item.first);
feed_tensors.push_back(t);
}
framework::interpreter::CostInfo cost_info;
{
pybind11::gil_scoped_release release;
cost_info = self.DryRun(scope, feed_names, feed_tensors);
}
return cost_info;
});
m.def("init_gflags", framework::InitGflags);
......
......@@ -174,7 +174,9 @@ TEST(InterpreterCore, skip_gc_vars) {
Scope scope;
std::shared_ptr<InterpreterCore> startup_core =
CreateInterpreterCore(place, startup_prog, &scope);
std::make_shared<InterpreterCore>(
place, startup_prog.Block(0), &scope, interpreter::ExecutionConfig());
startup_core->Run({}, {});
std::set<std::string> skip_gc_vars = {"uniform_0.tmp_0",
......@@ -191,8 +193,9 @@ TEST(InterpreterCore, skip_gc_vars) {
interpreter::ExecutionConfig execution_config;
execution_config.skip_gc_vars = skip_gc_vars;
std::shared_ptr<InterpreterCore> main_core = CreateInterpreterCore(
place, main_prog, &scope, /*fetch_names=*/{}, execution_config);
std::shared_ptr<InterpreterCore> main_core =
std::make_shared<InterpreterCore>(
place, main_prog.Block(0), &scope, execution_config);
auto check_gc_result =
[](Scope& scope, std::set<std::string>& vars, bool is_skip_gc) {
......@@ -225,10 +228,10 @@ void TestShareWorkQueue(const ProgramDesc& prog,
const platform::CPUPlace place = platform::CPUPlace();
Scope scope;
std::shared_ptr<InterpreterCore> core1 =
CreateInterpreterCore(place, prog, &scope, fetch_names);
std::shared_ptr<InterpreterCore> core2 =
CreateInterpreterCore(place, prog, &scope, fetch_names);
std::shared_ptr<InterpreterCore> core1 = std::make_shared<InterpreterCore>(
place, prog.Block(0), &scope, interpreter::ExecutionConfig());
std::shared_ptr<InterpreterCore> core2 = std::make_shared<InterpreterCore>(
place, prog.Block(0), &scope, interpreter::ExecutionConfig());
core2->ShareWorkQueueFrom(core1);
auto run_and_check = [&feed_names, &feed_tensors, &fetch_results](
......
......@@ -17,64 +17,17 @@ import os
os.environ['FLAGS_use_stream_safe_cuda_allocator'] = "true"
import json
import shutil
import sys
import unittest
import numpy as np
import paddle
from paddle.fluid import core
from paddle.fluid.core import StandaloneExecutor
from paddle.profiler import profiler
paddle.enable_static()
class TestDryRun(unittest.TestCase):
def setUp(self):
place = (
paddle.CUDAPlace(0)
if core.is_compiled_with_cuda()
else paddle.CPUPlace()
)
self.place = core.Place()
self.place.set_place(place)
def build_program(self):
startup_program = paddle.static.Program()
main_program = paddle.static.Program()
with paddle.static.program_guard(main_program, startup_program):
a = paddle.static.data(name="a", shape=[2, 2], dtype='float32')
b = paddle.ones([2, 2]) * 2
t = paddle.static.nn.fc(a, 2)
c = t + b
return startup_program, main_program, c
def test_dry_run(self):
scope = core.Scope()
startup_program, main_program, c = self.build_program()
exe = paddle.static.Executor(self.place)
exe.run(startup_program, scope=scope)
standaloneexecutor = StandaloneExecutor(self.place, main_program.desc)
# test for cost_info
cost_info = standaloneexecutor.dry_run(
scope, {"a": np.ones([2, 2], dtype="float32")}
)
self.check_cost_info(cost_info)
def check_cost_info(self, cost_info):
IS_WINDOWS = sys.platform.startswith('win')
if core.is_compiled_with_cuda():
# # w,bias,b, out, memory block is at least 256 bytes on Linux
gt = 16 * 4 if IS_WINDOWS else 256 * 4
self.assertGreater(cost_info.device_memory_bytes(), gt)
else:
self.assertEqual(cost_info.device_memory_bytes(), 0)
def build_program():
main_program = paddle.static.Program()
startup_program = paddle.static.Program()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册