diff --git a/paddle/fluid/framework/fleet/gloo_wrapper.cc b/paddle/fluid/framework/fleet/gloo_wrapper.cc index 8780db89e854a598f6f77f0cd2989e2463acde4c..e18cad10ac2493fe1e80af87a55378113ea4fe17 100644 --- a/paddle/fluid/framework/fleet/gloo_wrapper.cc +++ b/paddle/fluid/framework/fleet/gloo_wrapper.cc @@ -229,18 +229,18 @@ void ParallelConnectContext::connectFullMesh( store.wait({key}, getTimeout()); std::vector allAddrs; - auto max_retry_times = 5; + auto max_retry_times = 10; // Connect to other side of this pair while (max_retry_times > 0) { allAddrs = store.get(key); - VLOG(3) << "store get all address size: " << allAddrs.size() << " except: " << total_add_size; if (allAddrs.size() == static_cast(total_add_size)) { break; } + sleep(5); --max_retry_times; } @@ -272,11 +272,13 @@ void GlooWrapper::Init() { attr.iface = iface_; std::shared_ptr file_store = nullptr; std::shared_ptr http_store = nullptr; - auto context = std::make_shared(rank_, size_); - context->setTimeout(run_timeout_); auto dev = gloo::transport::tcp::CreateDevice(attr); + switch (store_type_) { case GlooStoreType::HDFS: { + auto context = std::make_shared( + rank_, size_); + context->setTimeout(run_timeout_); std::string cmd = std::string("${HADOOP_HOME}/bin/hadoop fs"); cmd += " -D fs.default.name=" + hdfs_name_; cmd += " -D hadoop.job.ugi=" + hdfs_ugi_; @@ -286,22 +288,25 @@ void GlooWrapper::Init() { auto prefix_store = std::make_shared(prefix_, *file_store); context->connectFullMesh(*prefix_store, dev); + context_ = std::move(context); break; } case GlooStoreType::HTTP: { + auto context = std::make_shared(rank_, size_); + context->setTimeout(run_timeout_); http_store = std::make_shared( http_ip_, http_port_, prefix_ + "_" + http_scope_, rank_); http_store->SetTimeoutSeconds(init_timeout_.count()); context->connectFullMesh(*http_store, dev); http_store->Finalize(); VLOG(3) << "after calling http_store->Finalize."; + context_ = std::move(context); break; } default: LOG(ERROR) << "unknown store type " << store_type_; exit(-1); } - context_ = std::move(context); #endif is_initialized_ = true; VLOG(3) << "gloo initialized done."; diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index f3f8c815b004c45d512af100c0d2f49bbe7d34f8..acb57fc2456ec20692a18623f7815c348975c04e 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -976,7 +976,7 @@ def sparse_embedding(input, 'fluid.contrib.layers.sparse_embedding') check_dtype(dtype, 'dtype', ['float32'], - 'fluid.contrib.layers.sparse_embedding') + 'paddle.static.nn.sparse_embedding') w = helper.create_parameter( attr=helper.param_attr, diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py index 3bd94fb452785c9b24cf3dedc33d72438f5aa9f1..60daae8667dd6bad1e7f2c5b17afac4cdbd73ed4 100644 --- a/python/paddle/static/__init__.py +++ b/python/paddle/static/__init__.py @@ -14,13 +14,37 @@ # TODO: import framework api under this directory __all__ = [ - 'append_backward', 'gradients', 'Executor', 'global_scope', 'scope_guard', - 'BuildStrategy', 'CompiledProgram', 'Print', 'py_func', 'ExecutionStrategy', - 'name_scope', 'ParallelExecutor', 'program_guard', 'WeightNormParamAttr', - 'default_main_program', 'default_startup_program', 'Program', 'data', - 'InputSpec', 'save', 'load', 'save_inference_model', 'load_inference_model', - 'load_program_state', 'set_program_state', 'cpu_places', 'cuda_places', - 'xpu_places', 'Variable' + 'append_backward', + 'gradients', + 'Executor', + 'global_scope', + 'scope_guard', + 'BuildStrategy', + 'CompiledProgram', + 'Print', + 'py_func', + 'ExecutionStrategy', + 'name_scope', + 'ParallelExecutor', + 'program_guard', + 'WeightNormParamAttr', + 'default_main_program', + 'default_startup_program', + 'Program', + 'data', + 'InputSpec', + 'save', + 'load', + 'save_inference_model', + 'load_inference_model', + 'load_program_state', + 'set_program_state', + 'cpu_places', + 'cuda_places', + 'xpu_places', + 'Variable', + 'load_vars', + 'save_vars', ] from . import nn @@ -61,6 +85,10 @@ from ..fluid.io import save #DEFINE_ALIAS from ..fluid.io import load #DEFINE_ALIAS from ..fluid.io import load_program_state #DEFINE_ALIAS from ..fluid.io import set_program_state #DEFINE_ALIAS + +from ..fluid.io import load_vars #DEFINE_ALIAS +from ..fluid.io import save_vars #DEFINE_ALIAS + from ..fluid.layers import create_parameter #DEFINE_ALIAS from ..fluid.layers import create_global_var #DEFINE_ALIAS from ..fluid.layers.metric_op import auc #DEFINE_ALIAS diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py index 9161bb7af412c36ea740959e89a804883ca3010c..fd84a0a9284ee2d5964a4b85da543745828cd8da 100644 --- a/python/paddle/static/nn/__init__.py +++ b/python/paddle/static/nn/__init__.py @@ -38,6 +38,7 @@ __all__ = [ 'spectral_norm', 'switch_case', 'while_loop', + 'sparse_embedding', ] from .common import fc #DEFINE_ALIAS @@ -67,3 +68,4 @@ from ...fluid.layers import switch_case #DEFINE_ALIAS from ...fluid.layers import while_loop #DEFINE_ALIAS from ...fluid.input import embedding #DEFINE_ALIAS +from ...fluid.contrib.layers import sparse_embedding #DEFINE_ALIAS