未验证 提交 c28beb8a 编写于 作者: Y Yu Yang 提交者: GitHub

test(Pe): add dry run tests for pe (#14254)

Dry run tests will skip `Op.Run` and just perform job scheduling. It helps to analysis dead lock in PE.

test=develop
上级 80132933
......@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <cstddef> // for size_t
namespace paddle {
namespace framework {
......@@ -26,6 +27,7 @@ struct ExecutionStrategy {
bool allow_op_delay_{false};
size_t num_iteration_per_drop_scope_{100};
ExecutorType type_{kDefault};
bool dry_run_{false};
};
} // namespace details
......
......@@ -128,7 +128,9 @@ void FastThreadedSSAGraphExecutor::RunOpAsync(
size_t complete = 0;
while (op_to_run != nullptr) {
try {
if (LIKELY(!strategy_.dry_run_)) {
op_to_run->Run(strategy_.use_cuda_);
}
++complete;
} catch (...) {
exception_.Catch(std::current_exception());
......
......@@ -211,7 +211,9 @@ void ThreadedSSAGraphExecutor::RunOp(
if (VLOG_IS_ON(10)) {
VLOG(10) << op << " " << op->Name() << " : " << op->DebugString();
}
if (LIKELY(!strategy_.dry_run_)) {
op->Run(strategy_.use_cuda_);
}
VLOG(10) << op << " " << op->Name() << " Done ";
running_ops_--;
ready_var_q->Extend(op->Outputs());
......
......@@ -48,7 +48,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
// Use topological sort algorithm
FeedFetchList Run(const std::vector<std::string> &fetch_tensors) override;
~ThreadedSSAGraphExecutor() {}
~ThreadedSSAGraphExecutor() final = default;
private:
void RunOp(const std::shared_ptr<BlockingQueue<VarHandleBase *>> &ready_var_q,
......
......@@ -38,9 +38,20 @@ class ParallelExecutorPrivate {
explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places)
: places_(places) {}
~ParallelExecutorPrivate() {
if (own_local_scope_) {
for (size_t i = 1; i < local_scopes_.size(); ++i) {
// Skip the first scope, since it is the global scope.
Scope *local_scope = local_scopes_[i];
if (global_scope_->HasKid(local_scope)) {
global_scope_->DeleteScope(local_scope);
}
}
}
}
std::vector<platform::Place> places_;
std::vector<Scope *> local_scopes_;
Scope *global_scope_;
Scope *global_scope_; // not owned
std::unique_ptr<details::SSAGraphExecutor> executor_;
#ifdef PADDLE_WITH_CUDA
......@@ -306,16 +317,6 @@ ParallelExecutor::~ParallelExecutor() {
for (auto &p : member_->places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait();
}
if (member_->own_local_scope_) {
for (size_t i = 1; i < member_->local_scopes_.size(); ++i) {
Scope *local_scope = member_->local_scopes_[i];
if (member_->global_scope_->HasKid(local_scope)) {
member_->global_scope_->DeleteScope(local_scope);
}
}
}
// member_ must be destructed before gcs_ since the destructor of
// ReferenceCountOpHandle use raw pointers of gcs_ inside.
member_.reset();
......
......@@ -742,7 +742,12 @@ All parameter, weight, gradient are variables in Paddle.
will clean up the temp variables at the end of the current iteration.
2. In some NLP model, it may cause the GPU memory is insufficient,
in this case, you should reduce `num_iteration_per_drop_scope`.
)DOC");
)DOC")
.def_property("_dry_run",
[](const ExecutionStrategy &self) { return self.dry_run_; },
[](ExecutionStrategy &self, bool dry_run) {
self.dry_run_ = dry_run;
});
exec_strategy.def_property(
"use_experimental_executor",
......
......@@ -60,7 +60,7 @@ def data(name,
For example if shape=[1], the resulting shape is [-1, 1].
2. If shape contains -1, such as shape=[1, -1],
append_batch_size will be enforced to be be False (ineffective).
dtype(int|float): The type of data : float32, float_16, int etc
dtype(basestring): The type of data : float32, float_16, int etc
type(VarType): The output type. By default it is LOD_TENSOR.
lod_level(int): The LoD Level. 0 means the input data is not a sequence.
stop_gradient(bool): A boolean that mentions whether gradient should flow.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import unittest
import logging
import six
class TestBase(unittest.TestCase):
def main(self,
network_func,
iter=100,
iter_per_pe=100,
use_gpu=True,
use_experimental_executor=False):
if use_gpu and not fluid.core.is_compiled_with_cuda():
logging.warning(
"Paddle is not compiled with CUDA, skip GPU unittests")
return
main_prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.Scope()
with fluid.program_guard(main_prog, startup_prog):
with fluid.scope_guard(scope):
loss = network_func()
fluid.Executor(
fluid.CUDAPlace(0)
if use_gpu else fluid.CPUPlace()).run(startup_prog)
for _ in six.moves.xrange(iter):
exe_strategy = fluid.ExecutionStrategy()
exe_strategy._dry_run = True
exe_strategy.use_experimental_executor = use_experimental_executor
pe = fluid.ParallelExecutor(
use_cuda=True,
loss_name=loss.name,
main_program=main_prog,
exec_strategy=exe_strategy)
for _ in six.moves.xrange(iter_per_pe):
pe.run([])
class TestMNISTDryRun(TestBase):
def test_mnist_dry_run(self):
for use_gpu in (False, True):
for use_experimental_executor in (False, True):
self.main(
network_func=TestMNISTDryRun.network_func,
use_gpu=use_gpu,
use_experimental_executor=use_experimental_executor)
@staticmethod
def network_func():
img = fluid.layers.data(name='img', shape=[784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = img
for _ in six.moves.xrange(10):
hidden = fluid.layers.fc(input=img, size=200, act='tanh')
prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
fluid.optimizer.Adam().minimize(avg_loss)
return avg_loss
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册