提交 2a05b3d5 编写于 作者: T tangwei12

delete checkpoint function

上级 e21a72d1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/operators/detail/checkpoint.h"
#include <string>
namespace paddle {
namespace framework {
namespace details {
Checkpoint::Save(const framework::Scope& scope, const platform::Place& place,
const std::string& save_dir, const std::string& var_name,
const bool& overwrite) {
auto* var = scope.FindVar(var_name);
PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s for save_op",
var_name);
PADDLE_ENFORCE(var->IsType<framework::LoDTensor>(),
"Checkpoint only supports LoDTensor, %s has wrong type",
var_name);
bool is_present = FileExists(save_dir);
if (is_present && !overwrite) {
PADDLE_THROW("%s exists!, checkpoint cannot write it when overwrite=false",
save_dir, overwrite);
}
MkDirRecursively(DirName(save_dir).c_str());
std::ofstream fout(save_dir);
PADDLE_ENFORCE(static_cast<bool>(fout), "Cannot open %s to write", save_dir);
// get device context from pool
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& dev_ctx = *pool.Get(place);
auto& tensor = var->Get<framework::LoDTensor>();
// Serialize tensor
framework::SerializeToStream(fout, tensor, dev_ctx);
fout.close();
}
} // namespace details
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/framework/scope.h"
namespace paddle {
namespace framework {
namespace details {
class Checkpoint {
public:
static void Save(const framework::Scope& scope, const platform::Place& place,
const std::string& save_dir, const std::string& var_name,
const bool& overwrite);
static void Load();
}
} // namespace details
} // namespace framework
} // namespace paddle
......@@ -101,7 +101,6 @@ void ListenAndServOp::RunSyncLoop(framework::Executor *executor,
framework::Scope *recv_scope,
framework::BlockDesc *prefetch_block) const {
auto fan_in = Attr<int>("Fanin");
auto checkpoint = Attr<std::string>("Checkpoint");
size_t num_blocks = program->Size();
PADDLE_ENFORCE_GE(num_blocks, 2,
......@@ -190,17 +189,6 @@ void ListenAndServOp::RunSyncLoop(framework::Executor *executor,
var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear();
}
/******************** CHECK POINT ***********************/
std::vector<std::string> all_vars = recv_scope.LocalVarNames();
std::vector<std::string>::iterator it;
for (it = all_vars.begin(); it != all_vars.end(); it++) {
VLOG(2) << "Checkpoint Var: " << *it;
break;
}
/******************** CHECK POINT ***********************/
rpc_service_->SetCond(1);
// FIXME(typhoonzero): use another condition to sync wait clients get.
rpc_service_->WaitClientGet(fan_in);
......
......@@ -483,8 +483,7 @@ class DistributeTranspiler:
"Fanin": self.trainer_num,
"PrefetchBlock": prefetch_block,
"sync_mode": self.sync_mode,
"grad_to_block_id": grad_to_block_id,
"Checkpoint": "/tmp/tangwei_ckpt/"
"grad_to_block_id": grad_to_block_id
})
pserver_program.sync_with_cpp()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册