a better formula to calculate task node time

Former-commit-id: 50cf37fd43c7c343da4f436c4226bd7229ef1a5d

a better formula to calculate task node time
Former-commit-id: 50cf37fd43c7c343da4f436c4226bd7229ef1a5d
0535db71 · Xinqi Li · a49e738f · 0535db71 · 0535db71 · 0535db71
6 changed file
--- a/oneflow/core/auto_placement/demo_chain_graph.cpp
+++ b/oneflow/core/auto_placement/demo_chain_graph.cpp
@@ -283,22 +283,23 @@ DemoChainGraph::CalcChainRegstId2PathChainNodeIds(
  return ret;
 }

-std::vector<std::vector<int64_t>> DemoChainGraph::SplitedRegstIds() const {
-  std::vector<std::vector<int64_t>> ret;
+std::vector<double> DemoChainGraph::RegstId2IsCloned() const {
+  std::vector<double> ret(regsts_.size());
  for (const auto& regst : regsts_) {
-    if (!regst->IsRegstCloned()) {
-      ret.push_back(std::vector<int64_t>{regst->chain_regst_id()});
-    }
+    ret.at(regst->chain_regst_id()) = (regst->IsRegstCloned() ? 1 : 0);
  }
  return ret;
 }

-std::vector<std::vector<int64_t>> DemoChainGraph::ClonedRegstIds() const {
-  std::vector<std::vector<int64_t>> ret;
+std::vector<double> DemoChainGraph::RegstIIRatio(int piece_num_in_batch) const {
+  std::vector<double> ret(regsts_.size());
  for (const auto& regst : regsts_) {
-    if (regst->IsRegstCloned()) {
-      ret.push_back(std::vector<int64_t>{regst->chain_regst_id()});
+    double ii_ratio = 1;
+    if (regst->producer()->task_type() == TaskType::kMdDiffAcc
+        || regst->producer()->task_type() == TaskType::kMdUpdt) {
+      ii_ratio = piece_num_in_batch;
    }
+    ret.at(regst->chain_regst_id()) = ii_ratio;
  }
  return ret;
 }

--- a/oneflow/core/auto_placement/demo_chain_graph.h
+++ b/oneflow/core/auto_placement/demo_chain_graph.h
@@ -144,8 +144,9 @@ class DemoChainGraph final : public Graph<DemoChainNode, DemoChainEdge> {
        [](int64_t) -> double { return 1; });
  }

-  std::vector<std::vector<int64_t>> SplitedRegstIds() const;
-  std::vector<std::vector<int64_t>> ClonedRegstIds() const;
+  std::vector<double> RegstId2IsCloned() const;
+
+  std::vector<double> RegstIIRatio(int piece_num_in_batch) const;

 private:
  friend class DemoChainGraphBuilder;

--- a/oneflow/core/auto_placement/demo_chain_graph_test.cpp
+++ b/oneflow/core/auto_placement/demo_chain_graph_test.cpp
@@ -25,12 +25,8 @@ TEST(DemoChainGraph, simple_without_model) {
      {0, 2, 3, 5}, {1, 2, 3, 4}, {2, 3}, {2, 3}, {3, 4}, {3, 5}};
  ASSERT_TRUE(graph.CalcChainRegstId2PathChainNodeIds() == expected_path);

-  std::vector<std::vector<int64_t>> expected_splited_regst_ids{{0}, {1}, {2},
-                                                               {3}, {4}, {5}};
-  ASSERT_TRUE(graph.SplitedRegstIds() == expected_splited_regst_ids);
-
-  std::vector<std::vector<int64_t>> expected_cloned_regst_ids{};
-  ASSERT_TRUE(graph.ClonedRegstIds() == expected_cloned_regst_ids);
+  std::vector<double> expected_regst_id2is_cloned{0, 0, 0, 0, 0, 0};
+  ASSERT_TRUE(graph.RegstId2IsCloned() == expected_regst_id2is_cloned);
 }

 TEST(DemoChainGraph, simple_with_model) {
@@ -53,13 +49,8 @@ TEST(DemoChainGraph, simple_with_model) {
      {0, 1, 2, 5}, {4, 1, 2}, {1, 2}, {1, 2}, {2, 3}, {3, 4}, {2, 5}};
  ASSERT_TRUE(graph.CalcChainRegstId2PathChainNodeIds() == expected_path);

-  std::vector<std::vector<int64_t>> expected_splited_regst_ids{
-      {0}, {2}, {3}, {6}};
-
-  ASSERT_TRUE(graph.SplitedRegstIds() == expected_splited_regst_ids);
-
-  std::vector<std::vector<int64_t>> expected_cloned_regst_ids{{1}, {4}, {5}};
-  ASSERT_TRUE(graph.ClonedRegstIds() == expected_cloned_regst_ids);
+  std::vector<double> expected_regst_id2is_cloned{0, 1, 0, 0, 1, 1, 0};
+  ASSERT_TRUE(graph.RegstId2IsCloned() == expected_regst_id2is_cloned);
 }

 }  // namespace test

--- a/oneflow/core/auto_placement/df_demo.cpp
+++ b/oneflow/core/auto_placement/df_demo.cpp
 #include "oneflow/core/auto_placement/df_func.h"
+#include "oneflow/core/auto_placement/demo_chain_graph.h"

 namespace oneflow {

@@ -6,8 +7,25 @@ namespace df {

 namespace {

+Tensor CalcTaskNodeTime(const Tensor& chain_node_placement) {
+  Tensor row_ones(Shape({chain_node_placement.shape().At(0)}), 1);
+  Tensor col_ones(Shape({chain_node_placement.shape().At(1)}), 1);
+  auto placement_copies = Clone(chain_node_placement, 3);
+  Tensor col_sum =
+      TensorProduct(row_ones, MatrixColSum(placement_copies.at(0)));
+  Tensor workload = ElemWiseMul(placement_copies.at(1), Reciprocal(col_sum));
+  Tensor row_sum = TensorProduct(MatrixRowSum(workload), col_ones);
+  return ElemWiseMul(Tanh(placement_copies.at(2)), row_sum);
+}
+
+Tensor CalcMemoryII(const Tensor& chain_node_placement,
+                    const DemoChainGraph& chain_graph) {
+  TODO();
+  return Tensor(0);
+}
+
 void AutoPlacementMemoryDemo() {
-  Tensor var(Shape({4, 4}), [](size_t index) { return index % 2 ? 0 : 100; });
+  Tensor var(Shape({4, 4}), [](size_t index) { return index % 2 ? 0 : 1; });
  Tensor row_ones(Shape({var.shape().At(0)}), 1);
  Tensor col_ones(Shape({var.shape().At(1)}), 1);
  Tensor epsilon(0.000000001);
@@ -24,24 +42,14 @@ void AutoPlacementMemoryDemo() {
    }

    Tensor x = Add(Square((FixedExpectation(Update(&var, lr), 1))), epsilon);
-    const auto& x_copies = Clone(x, 4);
-    Tensor row = MatrixRowSum(x_copies.at(0));
-    Tensor col = MatrixColSum(x_copies.at(1));
-    Tensor load =
-        ElemWiseMul(x_copies.at(2), TensorProduct(row_ones, Reciprocal(col)));
-    Tensor time = ElemWiseMul(TensorProduct(row, col_ones), load);
+    const auto& x_copies = Clone(x, 2);
+    Tensor time = CalcTaskNodeTime(x_copies.at(0));
    Tensor ii = Max(time);
-    Backward(Add(ii, AvgAbsDeviation(MatrixColMax(x_copies.at(3)))));
+    Backward(Add(ii, AvgAbsDeviation(MatrixColMax(x_copies.at(1)))));

    std::cout << "x: ";
    for (double i : x.buffer().data()) { std::cout << i << " "; }
    std::cout << std::endl;
-    std::cout << "row: ";
-    for (double i : row.buffer().data()) { std::cout << i << " "; }
-    std::cout << std::endl;
-    std::cout << "col: ";
-    for (double i : col.buffer().data()) { std::cout << i << " "; }
-    std::cout << std::endl;
    std::cout << "time: ";
    for (double i : time.buffer().data()) { std::cout << i << " "; }
    std::cout << std::endl << std::endl;

--- a/oneflow/core/auto_placement/df_func.cpp
+++ b/oneflow/core/auto_placement/df_func.cpp
@@ -4,20 +4,26 @@ namespace oneflow {

 namespace df {

-Tensor IndexReduce(const Tensor& input,
-                   const std::vector<std::vector<int64_t>>& reduce_indexes) {
-  int64_t size = reduce_indexes.size();
-  std::shared_ptr<Buffer> out(new Buffer(Shape({size}), 0));
-  FOR_RANGE(int, i, 0, out->Size()) {
-    for (int64_t index : reduce_indexes.at(i)) {
-      out->At(i) += input.At(index);
+Tensor ColIndexReduce(const Tensor& input,
+                      const std::vector<std::vector<int64_t>>& reduce_indexes) {
+  CHECK(input.shape().dim_vec().size() == 2);
+  auto shape =
+      Shape({input.shape().At(0), static_cast<int64_t>(reduce_indexes.size())});
+  std::shared_ptr<Buffer> out(new Buffer(shape, 0));
+  FOR_RANGE(int, i, 0, out->shape().At(0)) {
+    FOR_RANGE(int, j, 0, out->shape().At(1)) {
+      for (int64_t index : reduce_indexes.at(j)) {
+        out->At(i, j) += input.At(i, index);
+      }
    }
  }
  return Tensor(out, [=](const Buffer& out_diff) {
    Buffer input_diff(input.shape(), 0);
-    FOR_RANGE(int, i, 0, out_diff.Size()) {
-      for (int64_t index : reduce_indexes.at(i)) {
-        input_diff.At(index) += out_diff.At(i);
+    FOR_RANGE(int, i, 0, out_diff.shape().At(0)) {
+      FOR_RANGE(int, j, 0, out_diff.shape().At(1)) {
+        for (int64_t index : reduce_indexes.at(j)) {
+          input_diff.At(i, index) += out_diff.At(i, j);
+        }
      }
    }
    input.HandleDiff(input_diff);

--- a/oneflow/core/auto_placement/df_func.h
+++ b/oneflow/core/auto_placement/df_func.h
@@ -7,8 +7,8 @@ namespace oneflow {

 namespace df {

-Tensor IndexReduce(const Tensor& input,
-                   const std::vector<std::vector<int64_t>>& reduce_indexes);
+Tensor ColIndexReduce(const Tensor& input,
+                      const std::vector<std::vector<int64_t>>& reduce_indexes);

 Tensor Update(Tensor* var, double lr);