提交 02dab46a 编写于 作者: Q Qiao Longfei

add some debug info

上级 7e145b7c
...@@ -84,6 +84,8 @@ FeedFetchList AsyncSSAGraphExecutor::Run( ...@@ -84,6 +84,8 @@ FeedFetchList AsyncSSAGraphExecutor::Run(
} }
if (exception_holder_.IsCaught()) { if (exception_holder_.IsCaught()) {
VLOG(3) << "caught exception " << exception_holder_.Type()
<< ", rethrow it";
exception_holder_.ReThrow(); exception_holder_.ReThrow();
} }
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#pragma once #pragma once
#include <string>
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
...@@ -64,6 +66,21 @@ class ExceptionHolder { ...@@ -64,6 +66,21 @@ class ExceptionHolder {
ClearImpl(); ClearImpl();
} }
std::string Type() {
std::lock_guard<std::mutex> lock(mu_);
switch (type_) {
case kNone:
return "None";
case kEnforceNotMet: {
return "EnforceNotMet";
}
case kEOF: {
return "EOF";
}
}
return "unknown";
}
private: private:
void ClearImpl() { void ClearImpl() {
exception_.reset(); exception_.reset();
......
...@@ -79,6 +79,7 @@ class BlockingQueue { ...@@ -79,6 +79,7 @@ class BlockingQueue {
return true; return true;
} else { } else {
PADDLE_ENFORCE(closed_); PADDLE_ENFORCE(closed_);
VLOG(3) << "queue is closed! return nothing.";
return false; return false;
} }
} }
......
...@@ -59,6 +59,13 @@ def train(use_cuda, thread_num, cpu_num): ...@@ -59,6 +59,13 @@ def train(use_cuda, thread_num, cpu_num):
img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
py_reader = fluid.layers.create_py_reader_by_data(
capacity=64,
feed_list=[img, label],
name='py_reader',
use_double_buffer=True)
img, label = fluid.layers.read_file(py_reader)
prediction, avg_loss, acc = convolutional_neural_network(img, label) prediction, avg_loss, acc = convolutional_neural_network(img, label)
test_program = fluid.default_main_program().clone(for_test=True) test_program = fluid.default_main_program().clone(for_test=True)
...@@ -103,7 +110,7 @@ def train(use_cuda, thread_num, cpu_num): ...@@ -103,7 +110,7 @@ def train(use_cuda, thread_num, cpu_num):
exec_strategy = fluid.ExecutionStrategy() exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_threads = thread_num exec_strategy.num_threads = thread_num
exec_strategy.num_iteration_per_run = 2 exec_strategy.num_iteration_per_run = 1
main_program = fluid.default_main_program() main_program = fluid.default_main_program()
pe = fluid.ParallelExecutor( pe = fluid.ParallelExecutor(
...@@ -113,6 +120,22 @@ def train(use_cuda, thread_num, cpu_num): ...@@ -113,6 +120,22 @@ def train(use_cuda, thread_num, cpu_num):
build_strategy=build_strategy, build_strategy=build_strategy,
exec_strategy=exec_strategy) exec_strategy=exec_strategy)
py_reader.decorate_paddle_reader(train_reader)
py_reader.start()
step = 0
try:
while True:
print("step %d in" % step)
loss_val = pe.run(fetch_list=[avg_loss.name])
loss_val = numpy.mean(loss_val)
if step % 1 == 0:
print("Batch %d, Cost %f, queue size %d" %
(step, loss_val, py_reader.queue.size()))
step += 1
except fluid.core.EOFException:
py_reader.reset()
"""
step = 0 step = 0
for step_id, data in enumerate(train_reader()): for step_id, data in enumerate(train_reader()):
loss_val = pe.run(feed=feeder.feed(data), fetch_list=[avg_loss.name]) loss_val = pe.run(feed=feeder.feed(data), fetch_list=[avg_loss.name])
...@@ -120,6 +143,8 @@ def train(use_cuda, thread_num, cpu_num): ...@@ -120,6 +143,8 @@ def train(use_cuda, thread_num, cpu_num):
if step % 100 == 0: if step % 100 == 0:
print("Batch %d, Cost %f" % (step, loss_val)) print("Batch %d, Cost %f" % (step, loss_val))
step += 1 step += 1
"""
# test for epoch # test for epoch
avg_loss_val, acc_val = train_test( avg_loss_val, acc_val = train_test(
train_test_program=test_program, train_test_program=test_program,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册