未验证 提交 55b15db5 编写于 作者: C chengduo 提交者: GitHub

Add unit test for fuse all_reduce ops (#16699)

* test fuse all_reduce
上级 ad4a1bd1
...@@ -12,17 +12,18 @@ ...@@ -12,17 +12,18 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h"
#include <algorithm> #include <algorithm>
#include <string> #include <string>
#include <unordered_map>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/details/build_strategy.h" #include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
DEFINE_uint32(fuse_parameter_memory_size, 0, // 0 KB DEFINE_uint64(fuse_parameter_memory_size, 0, // 0 KB
"fuse_parameter_memory_size is up limited memory size " "fuse_parameter_memory_size is up limited memory size "
"of one group parameters' gradient which is the input " "of one group parameters' gradient which is the input "
"of communication calling(e.g NCCLAllReduce). " "of communication calling(e.g NCCLAllReduce). "
...@@ -40,14 +41,28 @@ DEFINE_int32( ...@@ -40,14 +41,28 @@ DEFINE_int32(
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
// SetFuseParameterGroupsSize and SetFuseParameterMemorySize are used in unit
// test, because it is invalid that seting 'FLAGS_fuse_parameter_memory_size'
// and 'FLAGS_fuse_parameter_groups_size' in unit test.
void SetFuseParameterGroupsSize(int group_size) {
FLAGS_fuse_parameter_groups_size = group_size;
}
int GetFuseParameterGroupsSize() { return FLAGS_fuse_parameter_groups_size; }
void SetFuseParameterMemorySize(uint64_t memory_size) {
FLAGS_fuse_parameter_memory_size = memory_size;
}
uint64_t GetFuseParameterMemorySize() {
return FLAGS_fuse_parameter_memory_size;
}
static const char kUnKnow[] = "@UNKNOW@"; static const char kUnKnow[] = "@UNKNOW@";
static framework::proto::VarType::Type kDefaultDtype = static framework::proto::VarType::Type kDefaultDtype =
framework::proto::VarType::Type::VarType_Type_BOOL; framework::proto::VarType::Type::VarType_Type_BOOL;
class AllocContinuousSpaceForGradPass : public ir::Pass { void AllocContinuousSpaceForGradPass::ApplyImpl(ir::Graph *graph) const {
protected:
void ApplyImpl(ir::Graph *graph) const override {
ir::Graph &result = *graph; ir::Graph &result = *graph;
auto &places = Get<const std::vector<platform::Place>>(kPlaces); auto &places = Get<const std::vector<platform::Place>>(kPlaces);
...@@ -131,29 +146,30 @@ class AllocContinuousSpaceForGradPass : public ir::Pass { ...@@ -131,29 +146,30 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
"%s is duplicate in FusedVars.", fused_var_name); "%s is duplicate in FusedVars.", fused_var_name);
fused_var_set.insert(fused_var_name); fused_var_set.insert(fused_var_name);
InitFusedVarsAndAllocSpaceForVars(places, local_scopes, vars, InitFusedVarsAndAllocSpaceForVars(places, local_scopes, vars, fused_var_name,
fused_var_name, params_grads); params_grads);
} }
template <typename AttrType> template <typename AttrType>
void ResetAttribute(const std::string &attr_name, ir::Graph *graph) const { void AllocContinuousSpaceForGradPass::ResetAttribute(
const std::string &attr_name, ir::Graph *graph) const {
if (graph->Has(attr_name)) { if (graph->Has(attr_name)) {
VLOG(10) << attr_name << " is reset."; VLOG(10) << attr_name << " is reset.";
graph->Erase(attr_name); graph->Erase(attr_name);
} }
graph->Set(attr_name, new AttrType); graph->Set(attr_name, new AttrType);
} }
void SetGroupGradsAndParams( void AllocContinuousSpaceForGradPass::SetGroupGradsAndParams(
const std::unordered_map<std::string, ir::Node *> &var_nodes, const std::unordered_map<std::string, ir::Node *> &var_nodes,
const ParamsAndGrads &params_grads, const ParamsAndGrads &params_grads,
GroupGradsAndParams *group_grads_params) const { GroupGradsAndParams *group_grads_params) const {
SetGroupAccordingToLayers(var_nodes, params_grads, group_grads_params); SetGroupAccordingToLayers(var_nodes, params_grads, group_grads_params);
SetGroupAccordingToMemorySize(var_nodes, group_grads_params); SetGroupAccordingToMemorySize(var_nodes, group_grads_params);
SetGroupAccordingToGroupSize(var_nodes, group_grads_params); SetGroupAccordingToGroupSize(var_nodes, group_grads_params);
} }
void SetGroupAccordingToLayers( void AllocContinuousSpaceForGradPass::SetGroupAccordingToLayers(
const std::unordered_map<std::string, ir::Node *> &var_nodes, const std::unordered_map<std::string, ir::Node *> &var_nodes,
const ParamsAndGrads &params_grads, const ParamsAndGrads &params_grads,
GroupGradsAndParams *group_grads_params) const { GroupGradsAndParams *group_grads_params) const {
...@@ -196,18 +212,16 @@ class AllocContinuousSpaceForGradPass : public ir::Pass { ...@@ -196,18 +212,16 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
} }
VLOG(10) << out.str(); VLOG(10) << out.str();
} }
} }
void SetGroupAccordingToMemorySize( void AllocContinuousSpaceForGradPass::SetGroupAccordingToMemorySize(
const std::unordered_map<std::string, ir::Node *> &var_nodes, const std::unordered_map<std::string, ir::Node *> &var_nodes,
GroupGradsAndParams *group_grads_params) const { GroupGradsAndParams *group_grads_params) const {
if (FLAGS_fuse_parameter_memory_size == 0) { const uint64_t group_memory_size = GetFuseParameterMemorySize();
if (group_memory_size == 0) {
return; return;
} }
size_t group_memory_size =
static_cast<size_t>(FLAGS_fuse_parameter_memory_size);
GroupGradsAndParams local_group_grads_params; GroupGradsAndParams local_group_grads_params;
size_t j = 0; size_t j = 0;
while (j < group_grads_params->size()) { while (j < group_grads_params->size()) {
local_group_grads_params.emplace_back(); local_group_grads_params.emplace_back();
...@@ -239,9 +253,8 @@ class AllocContinuousSpaceForGradPass : public ir::Pass { ...@@ -239,9 +253,8 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
std::swap(*group_grads_params, local_group_grads_params); std::swap(*group_grads_params, local_group_grads_params);
VLOG(10) << string::Sprintf( VLOG(10) << string::Sprintf("SetGroupAccordingToMemorySize(memory_size: %d):",
"SetGroupAccordingToMemorySize(memory_size: %d):", group_memory_size);
FLAGS_fuse_parameter_memory_size);
for (size_t i = 0; i < group_grads_params->size(); ++i) { for (size_t i = 0; i < group_grads_params->size(); ++i) {
VLOG(10) << "group " << i; VLOG(10) << "group " << i;
std::stringstream out; std::stringstream out;
...@@ -256,18 +269,17 @@ class AllocContinuousSpaceForGradPass : public ir::Pass { ...@@ -256,18 +269,17 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
} }
VLOG(10) << out.str(); VLOG(10) << out.str();
} }
} }
void SetGroupAccordingToGroupSize( void AllocContinuousSpaceForGradPass::SetGroupAccordingToGroupSize(
const std::unordered_map<std::string, ir::Node *> &var_nodes, const std::unordered_map<std::string, ir::Node *> &var_nodes,
GroupGradsAndParams *group_grads_params) const { GroupGradsAndParams *group_grads_params) const {
if (FLAGS_fuse_parameter_groups_size == 1) { if (GetFuseParameterGroupsSize() == 1) {
return; return;
} }
size_t group_size = static_cast<size_t>(FLAGS_fuse_parameter_groups_size); const int group_size = GetFuseParameterGroupsSize() == -1
if (FLAGS_fuse_parameter_groups_size == -1) { ? static_cast<int>(group_grads_params->size())
group_size = group_grads_params->size(); : GetFuseParameterGroupsSize();
}
PADDLE_ENFORCE_GT(group_size, 1); PADDLE_ENFORCE_GT(group_size, 1);
size_t groups = (group_grads_params->size() + group_size - 1) / group_size; size_t groups = (group_grads_params->size() + group_size - 1) / group_size;
GroupGradsAndParams local_group_grads_params; GroupGradsAndParams local_group_grads_params;
...@@ -287,8 +299,8 @@ class AllocContinuousSpaceForGradPass : public ir::Pass { ...@@ -287,8 +299,8 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
} }
std::swap(*group_grads_params, local_group_grads_params); std::swap(*group_grads_params, local_group_grads_params);
VLOG(10) << "SetGroupAccordingToGroupSize(group_size: " << group_size VLOG(10) << string::Sprintf("SetGroupAccordingToGroupSize(group_size: %d):",
<< "): "; group_size);
for (size_t i = 0; i < group_grads_params->size(); ++i) { for (size_t i = 0; i < group_grads_params->size(); ++i) {
VLOG(10) << "group " << i; VLOG(10) << "group " << i;
std::stringstream out; std::stringstream out;
...@@ -297,16 +309,16 @@ class AllocContinuousSpaceForGradPass : public ir::Pass { ...@@ -297,16 +309,16 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
} }
VLOG(10) << out.str(); VLOG(10) << out.str();
} }
} }
private: bool AllocContinuousSpaceForGradPass::IsSupportedVarType(
bool IsSupportedVarType(const proto::VarType::Type &type) const { const proto::VarType::Type &type) const {
// Current only support LOD_TENSOR. // Current only support LOD_TENSOR.
return type == proto::VarType::LOD_TENSOR; return type == proto::VarType::LOD_TENSOR;
} }
void RecordParamsAndGrads(ir::Node *node, void AllocContinuousSpaceForGradPass::RecordParamsAndGrads(
ParamsAndGrads *params_grads) const { ir::Node *node, ParamsAndGrads *params_grads) const {
try { try {
bool is_bk_op = bool is_bk_op =
static_cast<bool>(boost::get<int>(node->Op()->GetAttr( static_cast<bool>(boost::get<int>(node->Op()->GetAttr(
...@@ -325,14 +337,14 @@ class AllocContinuousSpaceForGradPass : public ir::Pass { ...@@ -325,14 +337,14 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
VLOG(10) << "Trainable parameter: " << backward_vars[i] VLOG(10) << "Trainable parameter: " << backward_vars[i]
<< ", gradient: " << backward_vars[i + 1]; << ", gradient: " << backward_vars[i + 1];
params_grads->emplace_back(std::make_pair( params_grads->emplace_back(std::make_pair(backward_vars[i] /*param*/,
backward_vars[i] /*param*/, backward_vars[i + 1] /*grad*/)); backward_vars[i + 1] /*grad*/));
} }
} catch (boost::bad_get e) { } catch (boost::bad_get e) {
} }
} }
void InitFusedVarsAndAllocSpaceForVars( void AllocContinuousSpaceForGradPass::InitFusedVarsAndAllocSpaceForVars(
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
const std::vector<Scope *> &local_scopes, const std::vector<Scope *> &local_scopes,
const std::unordered_map<std::string, ir::Node *> &vars, const std::unordered_map<std::string, ir::Node *> &vars,
...@@ -376,19 +388,18 @@ class AllocContinuousSpaceForGradPass : public ir::Pass { ...@@ -376,19 +388,18 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
op->Run(*local_scopes[i], places[i]); op->Run(*local_scopes[i], places[i]);
} }
} }
} }
void AppendAllocSpaceForVarsOp(const std::vector<std::string> &params_name, void AllocContinuousSpaceForGradPass::AppendAllocSpaceForVarsOp(
const std::vector<std::string> &params_name,
const std::vector<std::string> &grads_name, const std::vector<std::string> &grads_name,
const std::string &fused_var_name, const std::string &fused_var_name, BlockDesc *global_block) const {
BlockDesc *global_block) const {
auto op_desc = global_block->AppendOp(); auto op_desc = global_block->AppendOp();
op_desc->SetType("alloc_continuous_space"); op_desc->SetType("alloc_continuous_space");
op_desc->SetInput("Input", params_name); op_desc->SetInput("Input", params_name);
op_desc->SetOutput("Output", grads_name); op_desc->SetOutput("Output", grads_name);
op_desc->SetOutput("FusedOutput", {fused_var_name}); op_desc->SetOutput("FusedOutput", {fused_var_name});
} }
};
} // namespace details } // namespace details
} // namespace framework } // namespace framework
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
namespace paddle {
namespace framework {
namespace details {
void SetFuseParameterGroupsSize(int group_size);
int GetFuseParameterGroupsSize();
void SetFuseParameterMemorySize(uint64_t memory_size);
uint64_t GetFuseParameterMemorySize();
class AllocContinuousSpaceForGradPass : public ir::Pass {
protected:
void ApplyImpl(ir::Graph *graph) const override;
template <typename AttrType>
void ResetAttribute(const std::string &attr_name, ir::Graph *graph) const;
void SetGroupGradsAndParams(
const std::unordered_map<std::string, ir::Node *> &var_nodes,
const ParamsAndGrads &params_grads,
GroupGradsAndParams *group_grads_params) const;
void SetGroupAccordingToLayers(
const std::unordered_map<std::string, ir::Node *> &var_nodes,
const ParamsAndGrads &params_grads,
GroupGradsAndParams *group_grads_params) const;
void SetGroupAccordingToMemorySize(
const std::unordered_map<std::string, ir::Node *> &var_nodes,
GroupGradsAndParams *group_grads_params) const;
void SetGroupAccordingToGroupSize(
const std::unordered_map<std::string, ir::Node *> &var_nodes,
GroupGradsAndParams *group_grads_params) const;
private:
bool IsSupportedVarType(const proto::VarType::Type &type) const;
void RecordParamsAndGrads(ir::Node *node, ParamsAndGrads *params_grads) const;
void InitFusedVarsAndAllocSpaceForVars(
const std::vector<platform::Place> &places,
const std::vector<Scope *> &local_scopes,
const std::unordered_map<std::string, ir::Node *> &vars,
const std::string &fused_var_name,
const ParamsAndGrads &params_grads) const;
void AppendAllocSpaceForVarsOp(const std::vector<std::string> &params_name,
const std::vector<std::string> &grads_name,
const std::string &fused_var_name,
BlockDesc *global_block) const;
};
} // namespace details
} // namespace framework
} // namespace paddle
...@@ -21,6 +21,7 @@ limitations under the License. */ ...@@ -21,6 +21,7 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h"
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/framework.pb.h"
...@@ -165,6 +166,11 @@ PYBIND11_MODULE(core, m) { ...@@ -165,6 +166,11 @@ PYBIND11_MODULE(core, m) {
// to enable eager deletion mode in unittest. // to enable eager deletion mode in unittest.
m.def("_set_eager_deletion_mode", &paddle::framework::SetEagerDeletionMode); m.def("_set_eager_deletion_mode", &paddle::framework::SetEagerDeletionMode);
m.def("_set_fuse_parameter_group_size",
&paddle::framework::details::SetFuseParameterGroupsSize);
m.def("_set_fuse_parameter_memory_size",
&paddle::framework::details::SetFuseParameterMemorySize);
m.add_object("_cleanup", m.add_object("_cleanup",
py::capsule([]() { ScopePool::Instance().Clear(); })); py::capsule([]() { ScopePool::Instance().Clear(); }));
......
...@@ -14,10 +14,11 @@ ...@@ -14,10 +14,11 @@
from __future__ import print_function from __future__ import print_function
import os import os
os.environ['FLAGS_fuse_parameter_memory_size'] = "131072"
os.environ['FLAGS_fuse_parameter_groups_size'] = "3"
import paddle.fluid as fluid import paddle.fluid as fluid
fluid.core._set_fuse_parameter_group_size(3)
fluid.core._set_fuse_parameter_memory_size(131072)
import paddle.fluid.layers.ops as ops import paddle.fluid.layers.ops as ops
from paddle.fluid.initializer import init_on_cpu from paddle.fluid.initializer import init_on_cpu
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册