From fdd24939104093877c6cbac4a599f6559631b368 Mon Sep 17 00:00:00 2001 From: Thunderbrook <52529258+Thunderbrook@users.noreply.github.com> Date: Wed, 2 Sep 2020 14:55:53 +0800 Subject: [PATCH] fix eigen in push sparse; fix hadoop command (#26872) (#26908) * fix eigen in push sparse; fix hadoop command test=develop * add log in load_combine_op test=develop --- paddle/fluid/framework/data_set.cc | 3 ++- paddle/fluid/framework/fleet/fleet_wrapper.cc | 2 +- paddle/fluid/operators/load_combine_op.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/data_set.cc b/paddle/fluid/framework/data_set.cc index 0684d5674ad..d57988b89e9 100644 --- a/paddle/fluid/framework/data_set.cc +++ b/paddle/fluid/framework/data_set.cc @@ -92,9 +92,10 @@ void DatasetImpl::SetHdfsConfig(const std::string& fs_name, const std::string& fs_ugi) { fs_name_ = fs_name; fs_ugi_ = fs_ugi; - std::string cmd = std::string("hadoop fs"); + std::string cmd = std::string("$HADOOP_HOME/bin/hadoop fs"); cmd += " -D fs.default.name=" + fs_name; cmd += " -D hadoop.job.ugi=" + fs_ugi; + cmd += " -Ddfs.client.block.write.retries=15 -Ddfs.rpc.timeout=500000"; paddle::framework::hdfs_set_command(cmd); } diff --git a/paddle/fluid/framework/fleet/fleet_wrapper.cc b/paddle/fluid/framework/fleet/fleet_wrapper.cc index 135fc407943..781518844a9 100644 --- a/paddle/fluid/framework/fleet/fleet_wrapper.cc +++ b/paddle/fluid/framework/fleet/fleet_wrapper.cc @@ -589,7 +589,7 @@ void FleetWrapper::PushSparseVarsWithLabelAsync( float* g = g_tensor->data(); if (scale_sparse_gradient_with_batch_size_ && grad_dim > 0) { - int dim = emb_dim + offset; + int dim = emb_dim; Eigen::Map< Eigen::Matrix> g_mat(g, g_tensor->numel() / dim, dim); diff --git a/paddle/fluid/operators/load_combine_op.h b/paddle/fluid/operators/load_combine_op.h index 1b4db94b298..589df8821b3 100644 --- a/paddle/fluid/operators/load_combine_op.h +++ b/paddle/fluid/operators/load_combine_op.h @@ -70,6 +70,7 @@ class LoadCombineOpKernel : public framework::OpKernel { auto out_vars = context.MultiOutputVar("Out"); for (size_t i = 0; i < out_var_names.size(); i++) { + VLOG(4) << "loading tensor: " << out_var_names[i]; PADDLE_ENFORCE_NOT_NULL( out_vars[i], platform::errors::InvalidArgument( "The variable %s to be loaded cannot be found.", -- GitLab