From b8f17a049d6b5b19dc74e78c7305d715d3679539 Mon Sep 17 00:00:00 2001 From: hutuxian Date: Thu, 4 Jun 2020 15:14:12 +0800 Subject: [PATCH] fix problem in dump and add log (#24891) * Fix the field length in LoD scenario * Fix the missed lod info when copy tensor in dump field * Add some log to make debug easy --- paddle/fluid/framework/device_worker.cc | 17 +++++++++++++---- paddle/fluid/framework/fleet/box_wrapper.h | 4 ++-- .../paddle/fluid/tests/unittests/test_boxps.py | 12 +++++++++--- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/framework/device_worker.cc b/paddle/fluid/framework/device_worker.cc index 5cfe664203..f7e64b4f65 100644 --- a/paddle/fluid/framework/device_worker.cc +++ b/paddle/fluid/framework/device_worker.cc @@ -163,26 +163,35 @@ void DeviceWorker::DumpField(const Scope& scope, int dump_mode, for (auto& field : *dump_fields_) { Variable* var = scope.FindVar(field); if (var == nullptr) { + VLOG(0) << "Note: field[" << field + << "] cannot be find in scope, so it was skipped."; continue; } LoDTensor* tensor = var->GetMutable(); + if (!tensor->IsInitialized()) { + VLOG(0) << "Note: field[" << field + << "] is not initialized, so it was skipped."; + continue; + } framework::LoDTensor cpu_tensor; if (platform::is_gpu_place(tensor->place())) { TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor); + cpu_tensor.set_lod(tensor->lod()); tensor = &cpu_tensor; } if (!CheckValidOutput(tensor, batch_size)) { + VLOG(0) << "Note: field[" << field << "] cannot pass check, so it was " + "skipped. Maybe the dimension is " + "wrong "; continue; } for (size_t i = 0; i < batch_size; ++i) { if (!hit[i]) { continue; } - auto output_dim = tensor->dims()[1]; - std::string output_dimstr = boost::lexical_cast(output_dim); - ars[i] = ars[i] + "\t" + field + ":" + output_dimstr; auto bound = GetTensorBound(tensor, i); - + ars[i] = ars[i] + "\t" + field + ":" + + std::to_string(bound.second - bound.first); ars[i] += PrintLodTensor(tensor, bound.first, bound.second); } } diff --git a/paddle/fluid/framework/fleet/box_wrapper.h b/paddle/fluid/framework/fleet/box_wrapper.h index 37fd35b4f0..27eb0d68eb 100644 --- a/paddle/fluid/framework/fleet/box_wrapper.h +++ b/paddle/fluid/framework/fleet/box_wrapper.h @@ -266,7 +266,7 @@ class AfsManager { fcntl(fd_read[0], F_SETFD, FD_CLOEXEC); fp_read = fdopen(fd_read[0], "r"); PADDLE_ENFORCE_NE( - fp_read, 0, + fp_read, nullptr, platform::errors::External( "Failed to open file descriptor via fdopen in AfsManager.")); } @@ -276,7 +276,7 @@ class AfsManager { fcntl(fd_write[1], F_SETFD, FD_CLOEXEC); fp_write = fdopen(fd_write[1], "w"); PADDLE_ENFORCE_NE( - fp_write, 0, + fp_write, nullptr, platform::errors::External( "Failed to open file descriptor via fdopen in AfsManager.")); } diff --git a/python/paddle/fluid/tests/unittests/test_boxps.py b/python/paddle/fluid/tests/unittests/test_boxps.py index 88a6e5e9be..a61cc9747e 100644 --- a/python/paddle/fluid/tests/unittests/test_boxps.py +++ b/python/paddle/fluid/tests/unittests/test_boxps.py @@ -105,6 +105,7 @@ class TestBoxPSPreload(unittest.TestCase): name='x', shape=[1], dtype='int64', lod_level=0) y = fluid.layers.data( name='y', shape=[1], dtype='int64', lod_level=0) + z = layers.data(name='z', shape=[1], dtype='int64') emb_x, emb_y = _pull_box_sparse([x, y], size=2) emb_xp = _pull_box_sparse(x, size=2) concat = layers.concat([emb_x, emb_y], axis=1) @@ -114,7 +115,6 @@ class TestBoxPSPreload(unittest.TestCase): num_flatten_dims=1, bias_attr=False) loss = layers.reduce_mean(fc) - layers.Print(loss) place = fluid.CPUPlace( ) if is_cpu or not core.is_compiled_with_cuda( ) else fluid.CUDAPlace(0) @@ -161,8 +161,14 @@ class TestBoxPSPreload(unittest.TestCase): sync_steps=-1) optimizer.minimize(loss) - program._pipeline_opt[ - "dump_fields"] = ["fc.tmp_0", "fc.tmp_0@GRAD", "hehe"] + program._pipeline_opt["dump_fields"] = [ + "fc.tmp_0", "fc.tmp_0@GRAD", "fake_var", "z", + "reduce_mean_3.tmp_0" + ] + # fake_var: not in scope + # z: in scope, but no initialized + # reduce_mean_0.tmp_0, dimension is not right + program._pipeline_opt["dump_fields_path"] = "./dump_log/" program._pipeline_opt["dump_param"] = ["fc.w_0"] program._pipeline_opt["enable_random_dump"] = True -- GitLab