diff --git a/paddle/fluid/operators/read_op.cc b/paddle/fluid/operators/read_op.cc index 65fcce8bb019965a805ad09d50be0aba64e4f24e..a0d640b2020958af53a4405ae886eadb2a1e117e 100644 --- a/paddle/fluid/operators/read_op.cc +++ b/paddle/fluid/operators/read_op.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/operators/detail/safe_ref.h" +#include "paddle/fluid/platform/profiler.h" namespace paddle { namespace operators { @@ -65,6 +66,12 @@ class ReadOp : public framework::OperatorBase { .GetMutable(); std::vector out_arg_names = Outputs("Out"); std::vector ins; + + // For profiling + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto& ctx = *pool.Get(dev_place); + platform::RecordEvent record_event(Type(), &ctx); + reader->ReadNext(&ins); if (ins.empty()) { if (Attr("throw_eof_exp")) { diff --git a/python/paddle/fluid/tests/unittests/dist_se_resnext.py b/python/paddle/fluid/tests/unittests/dist_se_resnext.py index bf7816b2466edd7db836c738da90f5f97b631843..f1f35d96f67ad5ef79ec9cb20f070a8352f0e97e 100644 --- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py @@ -174,6 +174,9 @@ class SE_ResNeXt(): padding=(filter_size - 1) / 2, groups=groups, act=None, + # avoid pserver CPU init differs from GPU + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant()), bias_attr=False) return fluid.layers.batch_norm(input=conv, act=act) @@ -194,10 +197,8 @@ class SE_ResNeXt(): def get_model(batch_size): # Input data - image = fluid.layers.fill_constant( - shape=[batch_size, 3, 224, 224], dtype='float32', value=0.0) - label = fluid.layers.fill_constant( - shape=[batch_size, 1], dtype='int64', value=0.0) + image = fluid.layers.data(name="data", shape=[3, 224, 224], dtype='float32') + label = fluid.layers.data(name="int64", shape=[1], dtype='int64') # Train program model = SE_ResNeXt(layers=50) @@ -222,8 +223,10 @@ def get_model(batch_size): lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr), + # FIXME(typhoonzero): add back LR decay once ParallelExecutor fixed. + #learning_rate=fluid.layers.piecewise_decay( + # boundaries=bd, values=lr), + learning_rate=base_lr, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) optimizer.minimize(avg_cost) @@ -232,7 +235,7 @@ def get_model(batch_size): train_reader = paddle.batch( paddle.dataset.flowers.train(), batch_size=batch_size) test_reader = paddle.batch( - paddle.dataset.flowers.test(), batch_size=batch_size) + paddle.dataset.flowers.test(use_xmap=False), batch_size=batch_size) return test_program, avg_cost, train_reader, test_reader, acc_top1, out @@ -256,7 +259,6 @@ class DistSeResneXt2x2: trainers) pserver_prog = t.get_pserver_program(current_endpoint) startup_prog = t.get_startup_program(current_endpoint, pserver_prog) - place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) @@ -302,12 +304,19 @@ class DistSeResneXt2x2: ] feeder = fluid.DataFeeder(feed_var_list, place) - reader_generator = train_reader() - first_loss, = exe.run(fetch_list=[avg_cost.name]) + reader_generator = test_reader() + + data = next(reader_generator) + first_loss, = exe.run(fetch_list=[avg_cost.name], + feed=feeder.feed(data)) print(first_loss) + for i in xrange(5): - loss, = exe.run(fetch_list=[avg_cost.name]) - last_loss, = exe.run(fetch_list=[avg_cost.name]) + data = next(reader_generator) + loss, = exe.run(fetch_list=[avg_cost.name], feed=feeder.feed(data)) + + data = next(reader_generator) + last_loss, = exe.run(fetch_list=[avg_cost.name], feed=feeder.feed(data)) print(last_loss) diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index 1aaab6f906ef6482bc515bb3c42d82431902e1d8..58cfd4e1fd958d8d59e49c87fbbabd0182975add 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -63,7 +63,8 @@ class TestDistBase(unittest.TestCase): "PATH": os.getenv("PATH"), "PYTHONPATH": os.getenv("PYTHONPATH"), "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH"), - "FLAGS_fraction_of_gpu_memory_to_use": "0.15" + "FLAGS_fraction_of_gpu_memory_to_use": "0.15", + "FLAGS_cudnn_deterministic": "1" } # Run local to get a base line env_local = {"CUDA_VISIBLE_DEVICES": "0"} diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py index 04671d079731ce414561b0ede6bc2b195b07d82a..f3a5fd6985bab1d04f6e1484534367548f383dfb 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py @@ -17,8 +17,7 @@ from test_dist_base import TestDistBase class TestDistSeResneXt2x2(TestDistBase): def test_se_resnext(self): - # TODO(paddle-dev): Is the delta too large? - self.check_with_place("dist_se_resnext.py", delta=0.2) + self.check_with_place("dist_se_resnext.py") if __name__ == "__main__":