提交 0535db71 编写于 作者: X Xinqi Li

a better formula to calculate task node time


Former-commit-id: 50cf37fd43c7c343da4f436c4226bd7229ef1a5d
上级 a49e738f
......@@ -283,22 +283,23 @@ DemoChainGraph::CalcChainRegstId2PathChainNodeIds(
return ret;
}
std::vector<std::vector<int64_t>> DemoChainGraph::SplitedRegstIds() const {
std::vector<std::vector<int64_t>> ret;
std::vector<double> DemoChainGraph::RegstId2IsCloned() const {
std::vector<double> ret(regsts_.size());
for (const auto& regst : regsts_) {
if (!regst->IsRegstCloned()) {
ret.push_back(std::vector<int64_t>{regst->chain_regst_id()});
}
ret.at(regst->chain_regst_id()) = (regst->IsRegstCloned() ? 1 : 0);
}
return ret;
}
std::vector<std::vector<int64_t>> DemoChainGraph::ClonedRegstIds() const {
std::vector<std::vector<int64_t>> ret;
std::vector<double> DemoChainGraph::RegstIIRatio(int piece_num_in_batch) const {
std::vector<double> ret(regsts_.size());
for (const auto& regst : regsts_) {
if (regst->IsRegstCloned()) {
ret.push_back(std::vector<int64_t>{regst->chain_regst_id()});
double ii_ratio = 1;
if (regst->producer()->task_type() == TaskType::kMdDiffAcc
|| regst->producer()->task_type() == TaskType::kMdUpdt) {
ii_ratio = piece_num_in_batch;
}
ret.at(regst->chain_regst_id()) = ii_ratio;
}
return ret;
}
......
......@@ -144,8 +144,9 @@ class DemoChainGraph final : public Graph<DemoChainNode, DemoChainEdge> {
[](int64_t) -> double { return 1; });
}
std::vector<std::vector<int64_t>> SplitedRegstIds() const;
std::vector<std::vector<int64_t>> ClonedRegstIds() const;
std::vector<double> RegstId2IsCloned() const;
std::vector<double> RegstIIRatio(int piece_num_in_batch) const;
private:
friend class DemoChainGraphBuilder;
......
......@@ -25,12 +25,8 @@ TEST(DemoChainGraph, simple_without_model) {
{0, 2, 3, 5}, {1, 2, 3, 4}, {2, 3}, {2, 3}, {3, 4}, {3, 5}};
ASSERT_TRUE(graph.CalcChainRegstId2PathChainNodeIds() == expected_path);
std::vector<std::vector<int64_t>> expected_splited_regst_ids{{0}, {1}, {2},
{3}, {4}, {5}};
ASSERT_TRUE(graph.SplitedRegstIds() == expected_splited_regst_ids);
std::vector<std::vector<int64_t>> expected_cloned_regst_ids{};
ASSERT_TRUE(graph.ClonedRegstIds() == expected_cloned_regst_ids);
std::vector<double> expected_regst_id2is_cloned{0, 0, 0, 0, 0, 0};
ASSERT_TRUE(graph.RegstId2IsCloned() == expected_regst_id2is_cloned);
}
TEST(DemoChainGraph, simple_with_model) {
......@@ -53,13 +49,8 @@ TEST(DemoChainGraph, simple_with_model) {
{0, 1, 2, 5}, {4, 1, 2}, {1, 2}, {1, 2}, {2, 3}, {3, 4}, {2, 5}};
ASSERT_TRUE(graph.CalcChainRegstId2PathChainNodeIds() == expected_path);
std::vector<std::vector<int64_t>> expected_splited_regst_ids{
{0}, {2}, {3}, {6}};
ASSERT_TRUE(graph.SplitedRegstIds() == expected_splited_regst_ids);
std::vector<std::vector<int64_t>> expected_cloned_regst_ids{{1}, {4}, {5}};
ASSERT_TRUE(graph.ClonedRegstIds() == expected_cloned_regst_ids);
std::vector<double> expected_regst_id2is_cloned{0, 1, 0, 0, 1, 1, 0};
ASSERT_TRUE(graph.RegstId2IsCloned() == expected_regst_id2is_cloned);
}
} // namespace test
......
#include "oneflow/core/auto_placement/df_func.h"
#include "oneflow/core/auto_placement/demo_chain_graph.h"
namespace oneflow {
......@@ -6,8 +7,25 @@ namespace df {
namespace {
Tensor CalcTaskNodeTime(const Tensor& chain_node_placement) {
Tensor row_ones(Shape({chain_node_placement.shape().At(0)}), 1);
Tensor col_ones(Shape({chain_node_placement.shape().At(1)}), 1);
auto placement_copies = Clone(chain_node_placement, 3);
Tensor col_sum =
TensorProduct(row_ones, MatrixColSum(placement_copies.at(0)));
Tensor workload = ElemWiseMul(placement_copies.at(1), Reciprocal(col_sum));
Tensor row_sum = TensorProduct(MatrixRowSum(workload), col_ones);
return ElemWiseMul(Tanh(placement_copies.at(2)), row_sum);
}
Tensor CalcMemoryII(const Tensor& chain_node_placement,
const DemoChainGraph& chain_graph) {
TODO();
return Tensor(0);
}
void AutoPlacementMemoryDemo() {
Tensor var(Shape({4, 4}), [](size_t index) { return index % 2 ? 0 : 100; });
Tensor var(Shape({4, 4}), [](size_t index) { return index % 2 ? 0 : 1; });
Tensor row_ones(Shape({var.shape().At(0)}), 1);
Tensor col_ones(Shape({var.shape().At(1)}), 1);
Tensor epsilon(0.000000001);
......@@ -24,24 +42,14 @@ void AutoPlacementMemoryDemo() {
}
Tensor x = Add(Square((FixedExpectation(Update(&var, lr), 1))), epsilon);
const auto& x_copies = Clone(x, 4);
Tensor row = MatrixRowSum(x_copies.at(0));
Tensor col = MatrixColSum(x_copies.at(1));
Tensor load =
ElemWiseMul(x_copies.at(2), TensorProduct(row_ones, Reciprocal(col)));
Tensor time = ElemWiseMul(TensorProduct(row, col_ones), load);
const auto& x_copies = Clone(x, 2);
Tensor time = CalcTaskNodeTime(x_copies.at(0));
Tensor ii = Max(time);
Backward(Add(ii, AvgAbsDeviation(MatrixColMax(x_copies.at(3)))));
Backward(Add(ii, AvgAbsDeviation(MatrixColMax(x_copies.at(1)))));
std::cout << "x: ";
for (double i : x.buffer().data()) { std::cout << i << " "; }
std::cout << std::endl;
std::cout << "row: ";
for (double i : row.buffer().data()) { std::cout << i << " "; }
std::cout << std::endl;
std::cout << "col: ";
for (double i : col.buffer().data()) { std::cout << i << " "; }
std::cout << std::endl;
std::cout << "time: ";
for (double i : time.buffer().data()) { std::cout << i << " "; }
std::cout << std::endl << std::endl;
......
......@@ -4,20 +4,26 @@ namespace oneflow {
namespace df {
Tensor IndexReduce(const Tensor& input,
const std::vector<std::vector<int64_t>>& reduce_indexes) {
int64_t size = reduce_indexes.size();
std::shared_ptr<Buffer> out(new Buffer(Shape({size}), 0));
FOR_RANGE(int, i, 0, out->Size()) {
for (int64_t index : reduce_indexes.at(i)) {
out->At(i) += input.At(index);
Tensor ColIndexReduce(const Tensor& input,
const std::vector<std::vector<int64_t>>& reduce_indexes) {
CHECK(input.shape().dim_vec().size() == 2);
auto shape =
Shape({input.shape().At(0), static_cast<int64_t>(reduce_indexes.size())});
std::shared_ptr<Buffer> out(new Buffer(shape, 0));
FOR_RANGE(int, i, 0, out->shape().At(0)) {
FOR_RANGE(int, j, 0, out->shape().At(1)) {
for (int64_t index : reduce_indexes.at(j)) {
out->At(i, j) += input.At(i, index);
}
}
}
return Tensor(out, [=](const Buffer& out_diff) {
Buffer input_diff(input.shape(), 0);
FOR_RANGE(int, i, 0, out_diff.Size()) {
for (int64_t index : reduce_indexes.at(i)) {
input_diff.At(index) += out_diff.At(i);
FOR_RANGE(int, i, 0, out_diff.shape().At(0)) {
FOR_RANGE(int, j, 0, out_diff.shape().At(1)) {
for (int64_t index : reduce_indexes.at(j)) {
input_diff.At(i, index) += out_diff.At(i, j);
}
}
}
input.HandleDiff(input_diff);
......
......@@ -7,8 +7,8 @@ namespace oneflow {
namespace df {
Tensor IndexReduce(const Tensor& input,
const std::vector<std::vector<int64_t>>& reduce_indexes);
Tensor ColIndexReduce(const Tensor& input,
const std::vector<std::vector<int64_t>>& reduce_indexes);
Tensor Update(Tensor* var, double lr);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册