diff --git a/paddle/fluid/operators/distributed/request_handler_impl.cc b/paddle/fluid/operators/distributed/request_handler_impl.cc index b0d42be388f74a6a8427c39bbb54931df7ef6616..163154c678f65b08981041d647b11f4b2b5860ba 100644 --- a/paddle/fluid/operators/distributed/request_handler_impl.cc +++ b/paddle/fluid/operators/distributed/request_handler_impl.cc @@ -30,7 +30,7 @@ namespace distributed { // define LOOKUP_TABLE_PATH for checkpoint notify to save lookup table variables // to directory specified. -constexpr char LOOKUP_TABLE_PATH[] = "lookup_table_path"; +constexpr char LOOKUP_TABLE_PATH[] = "kLookupTablePath"; bool RequestSendHandler::Handle(const std::string& varname, framework::Scope* scope, @@ -136,7 +136,7 @@ bool RequestCheckpointHandler::Handle(const std::string& varname, auto* lt_var = scope->FindVar(LOOKUP_TABLE_PATH)->GetMutable(); lt_var->clear(); lt_var->append(out_var_name); - VLOG(4) << "RequestCheckpointHandler update var lookup_table_path to: " + VLOG(4) << "RequestCheckpointHandler update var kLookupTablePath to: " << out_var_name; executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope); return true; diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index 6663459939789fb9f5ef3fd46a3ebd1abb9265c0..66c9ed6ddf7dfd48e7ef5ebc2681b96898b65c6c 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -206,7 +206,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, VLOG(3) << "RunAsyncLoop into while"; while (true) { if (rpc_service_->IsExit()) { - LOG(INFO) << "get exit!rpc_processor break!"; + VLOG(4) << "get exit!rpc_processor break!"; break; } @@ -245,11 +245,11 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, PADDLE_ENFORCE(!rpc_service_); std::string endpoint = Attr("endpoint"); - int checkpoint_notify_block_id = Attr(kCheckpointBlockId); + int checkpoint_block_id = Attr(kCheckpointBlockId); - LOG(INFO) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in - << ", end_point:" << endpoint - << ", CheckpointNotify Id: " << checkpoint_notify_block_id; + VLOG(4) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in + << ", end_point:" << endpoint + << ", checkpoint_block_id: " << checkpoint_block_id; rpc_service_.reset(new RPCSERVER_T(endpoint, fan_in)); @@ -258,7 +258,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, request_prefetch_handler_.reset( new distributed::RequestPrefetchHandler(sync_mode)); request_checkpoint_handler_.reset(new distributed::RequestCheckpointHandler( - sync_mode, checkpoint_notify_block_id)); + sync_mode, checkpoint_block_id)); rpc_service_->RegisterRPC(distributed::kRequestSend, request_send_handler_.get()); @@ -277,8 +277,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, framework::Executor executor(dev_place); std::shared_ptr ckpt_pre_context = nullptr; - if (checkpoint_notify_block_id != -1) { - auto ctx = executor.Prepare(*program, checkpoint_notify_block_id); + if (checkpoint_block_id != -1) { + auto ctx = executor.Prepare(*program, checkpoint_block_id); + // see: https://stackoverflow.com/a/14856553 ckpt_pre_context = std::move(ctx); } @@ -335,7 +336,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, SavePort(); if (sync_mode) { RunSyncLoop(&executor, program, &recv_scope, prefetch_block_id_list, - checkpoint_notify_block_id); + checkpoint_block_id); } else { RunAsyncLoop(&executor, program); } diff --git a/paddle/fluid/operators/save_op.cc b/paddle/fluid/operators/save_op.cc index 493bb2ec890212530f77982b0d20f38dcbd3b514..201a51130d6b6f94104e2dabf9e7facffa672ae0 100644 --- a/paddle/fluid/operators/save_op.cc +++ b/paddle/fluid/operators/save_op.cc @@ -31,7 +31,7 @@ namespace operators { // define LOOKUP_TABLE_PATH for checkpoint notify to save lookup table variables // to directory specified. -constexpr char LOOKUP_TABLE_PATH[] = "lookup_table_path"; +constexpr char LOOKUP_TABLE_PATH[] = "kLookupTablePath"; // TODO(yuyang18): If the functions below are needed by other files, move them // to paddle::filesystem namespace. @@ -138,7 +138,7 @@ class SaveOp : public framework::OperatorBase { auto *lt_var = scope.FindVar(LOOKUP_TABLE_PATH)->GetMutable(); PADDLE_ENFORCE( lt_var != nullptr, - "Can not find variable lookup_table_path for SaveSelectedRows"); + "Can not find variable kLookupTablePath for SaveSelectedRows"); std::string filename = lt_var->data(); VLOG(4) << "SaveSelectedRows get File name: " << filename; diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index d74fdbf17f3260f253132acf4b68ff75aeeaa815..b15ef4cb4c70956092d05de53ce9aaaad00f7bb7 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -920,19 +920,17 @@ class DistributeTranspiler(object): import os pserver_program.global_block().create_var( - name="lookup_table_path", + name="kLookupTablePath", persistable=True, type=core.VarDesc.VarType.RAW) checkpoint_save_block = pserver_program.create_block(pre_block_idx) + # this 'file_path' do not be used in save lookup table variable checkpoint_save_block.append_op( type='save', inputs={'X': [self.table_name]}, outputs={}, - attrs={ - 'file_path': - "this 'file_path' do not be used in save lookup table variable" - }) + attrs={'file_path': ""}) return checkpoint_save_block.idx