未验证 提交 a2b2af90 编写于 作者: W Wilber 提交者: GitHub

Optimiza params sync between CPU and GPU. (#45805)

* enable memory optimize when fp16.

* optimiza params sync between cpu and gpu.
上级 63b6a11b
......@@ -21,9 +21,12 @@
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/data_type.h"
namespace paddle {
......@@ -114,6 +117,28 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
reserve_cpu_weights = true;
}
int64_t params_total_bytes{0};
for (auto *node : paddle::framework::ir::TopologySortOperations(graph)) {
if (!node->IsOp()) continue;
if (node->Op()->Type() == "feed" || node->Op()->Type() == "fetch") continue;
for (auto *var_node : node->inputs) {
if (!var_node->Var()->Persistable()) continue;
auto var_name = var_node->Var()->Name();
auto *var = scope->FindLocalVar(var_name);
if (var->IsType<framework::LoDTensor>() ||
var->IsType<framework::Tensor>()) {
auto *t = var->GetMutable<framework::LoDTensor>();
params_total_bytes += t->numel() * experimental::SizeOf(t->dtype());
}
}
}
{
// Alloc memory in pool to store all parameters.
framework::Tensor ts;
ts.mutable_data(place, params_total_bytes);
}
std::unordered_set<std::string> visited;
for (auto *node : paddle::framework::ir::TopologySortOperations(graph)) {
if (!node->IsOp()) continue;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册