Add unit test for fuse all_reduce ops (#16699)

* test fuse all_reduce

Add unit test for fuse all_reduce ops (#16699)
* test fuse all_reduce
55b15db5 · chengduo · GitHub · ad4a1bd1 · 55b15db5 · 55b15db5
4 changed file
--- a/paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.cc
+++ b/paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.cc
@@ -12,17 +12,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h"
 #include <algorithm>
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 #include "paddle/fluid/framework/details/build_strategy.h"
 #include "paddle/fluid/framework/details/multi_devices_helper.h"
 #include "paddle/fluid/framework/ir/graph_helper.h"
 #include "paddle/fluid/framework/op_registry.h"
-DEFINE_uint32(fuse_parameter_memory_size, 0,  // 0 KB
+DEFINE_uint64(fuse_parameter_memory_size, 0,  // 0 KB
              "fuse_parameter_memory_size is up limited memory size "
              "of one group parameters' gradient which is the input "
              "of communication calling(e.g NCCLAllReduce). "
@@ -40,14 +41,28 @@ DEFINE_int32(
 namespace paddle {
 namespace framework {
 namespace details {
+// SetFuseParameterGroupsSize and SetFuseParameterMemorySize are used in unit
+// test, because it is invalid that seting 'FLAGS_fuse_parameter_memory_size'
+// and 'FLAGS_fuse_parameter_groups_size' in unit test.
+void SetFuseParameterGroupsSize(int group_size) {
+  FLAGS_fuse_parameter_groups_size = group_size;
+}
+int GetFuseParameterGroupsSize() { return FLAGS_fuse_parameter_groups_size; }
+void SetFuseParameterMemorySize(uint64_t memory_size) {
+  FLAGS_fuse_parameter_memory_size = memory_size;
+}
+uint64_t GetFuseParameterMemorySize() {
+  return FLAGS_fuse_parameter_memory_size;
+}
 static const char kUnKnow[] = "@UNKNOW@";
 static framework::proto::VarType::Type kDefaultDtype =
    framework::proto::VarType::Type::VarType_Type_BOOL;
-class AllocContinuousSpaceForGradPass : public ir::Pass {
+void AllocContinuousSpaceForGradPass::ApplyImpl(ir::Graph *graph) const {
- protected:
-  void ApplyImpl(ir::Graph *graph) const override {
  ir::Graph &result = *graph;
  auto &places = Get<const std::vector<platform::Place>>(kPlaces);
@@ -131,29 +146,30 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
                    "%s is duplicate in FusedVars.", fused_var_name);
  fused_var_set.insert(fused_var_name);
-    InitFusedVarsAndAllocSpaceForVars(places, local_scopes, vars,
+  InitFusedVarsAndAllocSpaceForVars(places, local_scopes, vars, fused_var_name,
-                                      fused_var_name, params_grads);
+                                    params_grads);
-  }
+}
-  template <typename AttrType>
+template <typename AttrType>
-  void ResetAttribute(const std::string &attr_name, ir::Graph *graph) const {
+void AllocContinuousSpaceForGradPass::ResetAttribute(
+    const std::string &attr_name, ir::Graph *graph) const {
  if (graph->Has(attr_name)) {
    VLOG(10) << attr_name << " is reset.";
    graph->Erase(attr_name);
  }
  graph->Set(attr_name, new AttrType);
-  }
+}
-  void SetGroupGradsAndParams(
+void AllocContinuousSpaceForGradPass::SetGroupGradsAndParams(
    const std::unordered_map<std::string, ir::Node *> &var_nodes,
    const ParamsAndGrads &params_grads,
    GroupGradsAndParams *group_grads_params) const {
  SetGroupAccordingToLayers(var_nodes, params_grads, group_grads_params);
  SetGroupAccordingToMemorySize(var_nodes, group_grads_params);
  SetGroupAccordingToGroupSize(var_nodes, group_grads_params);
-  }
+}
-  void SetGroupAccordingToLayers(
+void AllocContinuousSpaceForGradPass::SetGroupAccordingToLayers(
    const std::unordered_map<std::string, ir::Node *> &var_nodes,
    const ParamsAndGrads &params_grads,
    GroupGradsAndParams *group_grads_params) const {
@@ -196,18 +212,16 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
    }
    VLOG(10) << out.str();
  }
-  }
+}
-  void SetGroupAccordingToMemorySize(
+void AllocContinuousSpaceForGradPass::SetGroupAccordingToMemorySize(
    const std::unordered_map<std::string, ir::Node *> &var_nodes,
    GroupGradsAndParams *group_grads_params) const {
-    if (FLAGS_fuse_parameter_memory_size == 0) {
+  const uint64_t group_memory_size = GetFuseParameterMemorySize();
+  if (group_memory_size == 0) {
    return;
  }
-    size_t group_memory_size =
-        static_cast<size_t>(FLAGS_fuse_parameter_memory_size);
  GroupGradsAndParams local_group_grads_params;
  size_t j = 0;
  while (j < group_grads_params->size()) {
    local_group_grads_params.emplace_back();
@@ -239,9 +253,8 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
  std::swap(*group_grads_params, local_group_grads_params);
-    VLOG(10) << string::Sprintf(
+  VLOG(10) << string::Sprintf("SetGroupAccordingToMemorySize(memory_size: %d):",
-        "SetGroupAccordingToMemorySize(memory_size: %d):",
+                              group_memory_size);
-        FLAGS_fuse_parameter_memory_size);
  for (size_t i = 0; i < group_grads_params->size(); ++i) {
    VLOG(10) << "group " << i;
    std::stringstream out;
@@ -256,18 +269,17 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
    }
    VLOG(10) << out.str();
  }
-  }
+}
-  void SetGroupAccordingToGroupSize(
+void AllocContinuousSpaceForGradPass::SetGroupAccordingToGroupSize(
    const std::unordered_map<std::string, ir::Node *> &var_nodes,
    GroupGradsAndParams *group_grads_params) const {
-    if (FLAGS_fuse_parameter_groups_size == 1) {
+  if (GetFuseParameterGroupsSize() == 1) {
    return;
  }
-    size_t group_size = static_cast<size_t>(FLAGS_fuse_parameter_groups_size);
+  const int group_size = GetFuseParameterGroupsSize() == -1
-    if (FLAGS_fuse_parameter_groups_size == -1) {
+                             ? static_cast<int>(group_grads_params->size())
-      group_size = group_grads_params->size();
+                             : GetFuseParameterGroupsSize();
-    }
  PADDLE_ENFORCE_GT(group_size, 1);
  size_t groups = (group_grads_params->size() + group_size - 1) / group_size;
  GroupGradsAndParams local_group_grads_params;
@@ -287,8 +299,8 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
  }
  std::swap(*group_grads_params, local_group_grads_params);
-    VLOG(10) << "SetGroupAccordingToGroupSize(group_size: " << group_size
+  VLOG(10) << string::Sprintf("SetGroupAccordingToGroupSize(group_size: %d):",
-             << "): ";
+                              group_size);
  for (size_t i = 0; i < group_grads_params->size(); ++i) {
    VLOG(10) << "group " << i;
    std::stringstream out;
@@ -297,16 +309,16 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
    }
    VLOG(10) << out.str();
  }
-  }
+}
- private:
+bool AllocContinuousSpaceForGradPass::IsSupportedVarType(
-  bool IsSupportedVarType(const proto::VarType::Type &type) const {
+    const proto::VarType::Type &type) const {
  // Current only support LOD_TENSOR.
  return type == proto::VarType::LOD_TENSOR;
-  }
+}
-  void RecordParamsAndGrads(ir::Node *node,
+void AllocContinuousSpaceForGradPass::RecordParamsAndGrads(
-                            ParamsAndGrads *params_grads) const {
+    ir::Node *node, ParamsAndGrads *params_grads) const {
  try {
    bool is_bk_op =
        static_cast<bool>(boost::get<int>(node->Op()->GetAttr(
@@ -325,14 +337,14 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
      VLOG(10) << "Trainable parameter: " << backward_vars[i]
               << ", gradient: " << backward_vars[i + 1];
-        params_grads->emplace_back(std::make_pair(
+      params_grads->emplace_back(std::make_pair(backward_vars[i] /*param*/,
-            backward_vars[i] /*param*/, backward_vars[i + 1] /*grad*/));
+                                                backward_vars[i + 1] /*grad*/));
    }
  } catch (boost::bad_get e) {
  }
-  }
+}
-  void InitFusedVarsAndAllocSpaceForVars(
+void AllocContinuousSpaceForGradPass::InitFusedVarsAndAllocSpaceForVars(
    const std::vector<platform::Place> &places,
    const std::vector<Scope *> &local_scopes,
    const std::unordered_map<std::string, ir::Node *> &vars,
@@ -376,19 +388,18 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
      op->Run(*local_scopes[i], places[i]);
    }
  }
-  }
+}
-  void AppendAllocSpaceForVarsOp(const std::vector<std::string> &params_name,
+void AllocContinuousSpaceForGradPass::AppendAllocSpaceForVarsOp(
+    const std::vector<std::string> &params_name,
    const std::vector<std::string> &grads_name,
-                                 const std::string &fused_var_name,
+    const std::string &fused_var_name, BlockDesc *global_block) const {
-                                 BlockDesc *global_block) const {
  auto op_desc = global_block->AppendOp();
  op_desc->SetType("alloc_continuous_space");
  op_desc->SetInput("Input", params_name);
  op_desc->SetOutput("Output", grads_name);
  op_desc->SetOutput("FusedOutput", {fused_var_name});
-  }
+}
-};
 }  // namespace details
 }  // namespace framework

--- a/paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h
+++ b/paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h
+//   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <algorithm>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include "paddle/fluid/framework/details/build_strategy.h"
+#include "paddle/fluid/framework/details/multi_devices_helper.h"
+#include "paddle/fluid/framework/ir/graph_helper.h"
+namespace paddle {
+namespace framework {
+namespace details {
+void SetFuseParameterGroupsSize(int group_size);
+int GetFuseParameterGroupsSize();
+void SetFuseParameterMemorySize(uint64_t memory_size);
+uint64_t GetFuseParameterMemorySize();
+class AllocContinuousSpaceForGradPass : public ir::Pass {
+ protected:
+  void ApplyImpl(ir::Graph *graph) const override;
+  template <typename AttrType>
+  void ResetAttribute(const std::string &attr_name, ir::Graph *graph) const;
+  void SetGroupGradsAndParams(
+      const std::unordered_map<std::string, ir::Node *> &var_nodes,
+      const ParamsAndGrads &params_grads,
+      GroupGradsAndParams *group_grads_params) const;
+  void SetGroupAccordingToLayers(
+      const std::unordered_map<std::string, ir::Node *> &var_nodes,
+      const ParamsAndGrads &params_grads,
+      GroupGradsAndParams *group_grads_params) const;
+  void SetGroupAccordingToMemorySize(
+      const std::unordered_map<std::string, ir::Node *> &var_nodes,
+      GroupGradsAndParams *group_grads_params) const;
+  void SetGroupAccordingToGroupSize(
+      const std::unordered_map<std::string, ir::Node *> &var_nodes,
+      GroupGradsAndParams *group_grads_params) const;
+ private:
+  bool IsSupportedVarType(const proto::VarType::Type &type) const;
+  void RecordParamsAndGrads(ir::Node *node, ParamsAndGrads *params_grads) const;
+  void InitFusedVarsAndAllocSpaceForVars(
+      const std::vector<platform::Place> &places,
+      const std::vector<Scope *> &local_scopes,
+      const std::unordered_map<std::string, ir::Node *> &vars,
+      const std::string &fused_var_name,
+      const ParamsAndGrads &params_grads) const;
+  void AppendAllocSpaceForVarsOp(const std::vector<std::string> &params_name,
+                                 const std::vector<std::string> &grads_name,
+                                 const std::string &fused_var_name,
+                                 BlockDesc *global_block) const;
+};
+}  // namespace details
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -21,6 +21,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>
+#include "paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h"
 #include "paddle/fluid/framework/executor.h"
 #include "paddle/fluid/framework/feed_fetch_method.h"
 #include "paddle/fluid/framework/framework.pb.h"
@@ -165,6 +166,11 @@ PYBIND11_MODULE(core, m) {
  // to enable eager deletion mode in unittest.
  m.def("_set_eager_deletion_mode", &paddle::framework::SetEagerDeletionMode);
+  m.def("_set_fuse_parameter_group_size",
+        &paddle::framework::details::SetFuseParameterGroupsSize);
+  m.def("_set_fuse_parameter_memory_size",
+        &paddle::framework::details::SetFuseParameterMemorySize);
  m.add_object("_cleanup",
               py::capsule([]() { ScopePool::Instance().Clear(); }));

--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py
@@ -14,10 +14,11 @@
 from __future__ import print_function
 import os
-os.environ['FLAGS_fuse_parameter_memory_size'] = "131072"
-os.environ['FLAGS_fuse_parameter_groups_size'] = "3"
 import paddle.fluid as fluid
+fluid.core._set_fuse_parameter_group_size(3)
+fluid.core._set_fuse_parameter_memory_size(131072)
 import paddle.fluid.layers.ops as ops
 from paddle.fluid.initializer import init_on_cpu
 from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter