未验证 提交 2d036c47 编写于 作者: W Wu Yi 提交者: GitHub

polish dist unit test code (#12512)

* polish dist se resnext ut

* update

* update

* update

* avoid cpu initializer differ

* change to use executor for now

* update by comment

* remove lr decay use para exe, should fix para exe bug later

* update by comment
上级 97a77512
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/operators/detail/safe_ref.h" #include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -65,6 +66,12 @@ class ReadOp : public framework::OperatorBase { ...@@ -65,6 +66,12 @@ class ReadOp : public framework::OperatorBase {
.GetMutable<framework::ReaderHolder>(); .GetMutable<framework::ReaderHolder>();
std::vector<std::string> out_arg_names = Outputs("Out"); std::vector<std::string> out_arg_names = Outputs("Out");
std::vector<framework::LoDTensor> ins; std::vector<framework::LoDTensor> ins;
// For profiling
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& ctx = *pool.Get(dev_place);
platform::RecordEvent record_event(Type(), &ctx);
reader->ReadNext(&ins); reader->ReadNext(&ins);
if (ins.empty()) { if (ins.empty()) {
if (Attr<bool>("throw_eof_exp")) { if (Attr<bool>("throw_eof_exp")) {
......
...@@ -174,6 +174,9 @@ class SE_ResNeXt(): ...@@ -174,6 +174,9 @@ class SE_ResNeXt():
padding=(filter_size - 1) / 2, padding=(filter_size - 1) / 2,
groups=groups, groups=groups,
act=None, act=None,
# avoid pserver CPU init differs from GPU
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant()),
bias_attr=False) bias_attr=False)
return fluid.layers.batch_norm(input=conv, act=act) return fluid.layers.batch_norm(input=conv, act=act)
...@@ -194,10 +197,8 @@ class SE_ResNeXt(): ...@@ -194,10 +197,8 @@ class SE_ResNeXt():
def get_model(batch_size): def get_model(batch_size):
# Input data # Input data
image = fluid.layers.fill_constant( image = fluid.layers.data(name="data", shape=[3, 224, 224], dtype='float32')
shape=[batch_size, 3, 224, 224], dtype='float32', value=0.0) label = fluid.layers.data(name="int64", shape=[1], dtype='int64')
label = fluid.layers.fill_constant(
shape=[batch_size, 1], dtype='int64', value=0.0)
# Train program # Train program
model = SE_ResNeXt(layers=50) model = SE_ResNeXt(layers=50)
...@@ -222,8 +223,10 @@ def get_model(batch_size): ...@@ -222,8 +223,10 @@ def get_model(batch_size):
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay( # FIXME(typhoonzero): add back LR decay once ParallelExecutor fixed.
boundaries=bd, values=lr), #learning_rate=fluid.layers.piecewise_decay(
# boundaries=bd, values=lr),
learning_rate=base_lr,
momentum=0.9, momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4)) regularization=fluid.regularizer.L2Decay(1e-4))
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
...@@ -232,7 +235,7 @@ def get_model(batch_size): ...@@ -232,7 +235,7 @@ def get_model(batch_size):
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.flowers.train(), batch_size=batch_size) paddle.dataset.flowers.train(), batch_size=batch_size)
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.flowers.test(), batch_size=batch_size) paddle.dataset.flowers.test(use_xmap=False), batch_size=batch_size)
return test_program, avg_cost, train_reader, test_reader, acc_top1, out return test_program, avg_cost, train_reader, test_reader, acc_top1, out
...@@ -256,7 +259,6 @@ class DistSeResneXt2x2: ...@@ -256,7 +259,6 @@ class DistSeResneXt2x2:
trainers) trainers)
pserver_prog = t.get_pserver_program(current_endpoint) pserver_prog = t.get_pserver_program(current_endpoint)
startup_prog = t.get_startup_program(current_endpoint, pserver_prog) startup_prog = t.get_startup_program(current_endpoint, pserver_prog)
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(startup_prog) exe.run(startup_prog)
...@@ -302,12 +304,19 @@ class DistSeResneXt2x2: ...@@ -302,12 +304,19 @@ class DistSeResneXt2x2:
] ]
feeder = fluid.DataFeeder(feed_var_list, place) feeder = fluid.DataFeeder(feed_var_list, place)
reader_generator = train_reader() reader_generator = test_reader()
first_loss, = exe.run(fetch_list=[avg_cost.name])
data = next(reader_generator)
first_loss, = exe.run(fetch_list=[avg_cost.name],
feed=feeder.feed(data))
print(first_loss) print(first_loss)
for i in xrange(5): for i in xrange(5):
loss, = exe.run(fetch_list=[avg_cost.name]) data = next(reader_generator)
last_loss, = exe.run(fetch_list=[avg_cost.name]) loss, = exe.run(fetch_list=[avg_cost.name], feed=feeder.feed(data))
data = next(reader_generator)
last_loss, = exe.run(fetch_list=[avg_cost.name], feed=feeder.feed(data))
print(last_loss) print(last_loss)
......
...@@ -63,7 +63,8 @@ class TestDistBase(unittest.TestCase): ...@@ -63,7 +63,8 @@ class TestDistBase(unittest.TestCase):
"PATH": os.getenv("PATH"), "PATH": os.getenv("PATH"),
"PYTHONPATH": os.getenv("PYTHONPATH"), "PYTHONPATH": os.getenv("PYTHONPATH"),
"LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH"), "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH"),
"FLAGS_fraction_of_gpu_memory_to_use": "0.15" "FLAGS_fraction_of_gpu_memory_to_use": "0.15",
"FLAGS_cudnn_deterministic": "1"
} }
# Run local to get a base line # Run local to get a base line
env_local = {"CUDA_VISIBLE_DEVICES": "0"} env_local = {"CUDA_VISIBLE_DEVICES": "0"}
......
...@@ -17,8 +17,7 @@ from test_dist_base import TestDistBase ...@@ -17,8 +17,7 @@ from test_dist_base import TestDistBase
class TestDistSeResneXt2x2(TestDistBase): class TestDistSeResneXt2x2(TestDistBase):
def test_se_resnext(self): def test_se_resnext(self):
# TODO(paddle-dev): Is the delta too large? self.check_with_place("dist_se_resnext.py")
self.check_with_place("dist_se_resnext.py", delta=0.2)
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册