提交 d62aa80e 编写于 作者: X Xinqi Li

grouper and placer


Former-commit-id: 299f161baaa4ab42f8a3a307b055c7c8f590080a
上级 fe93c989
#include <random>
#include <cmath>
#include "oneflow/core/auto_placement/df_func.h"
#include "oneflow/core/auto_placement/demo_chain_graph.h"
......@@ -9,14 +11,11 @@ namespace {
Tensor CalcTaskNodeComputeTime(const Tensor& chain_node_placement) {
Tensor row_ones(Shape({chain_node_placement.shape().At(0)}), 1);
auto placement_copies = Clone(chain_node_placement, 2);
Tensor col = MatrixColSum(placement_copies.at(0));
Tensor col_sum = TensorProduct(row_ones, col);
return ElemWiseDiv(placement_copies.at(1), col_sum);
return TensorProduct(row_ones, MatrixColSum(chain_node_placement));
}
Tensor CalcDeviceComputeTime(const Tensor& chain_node_placement) {
return MatrixRowSum(CalcTaskNodeComputeTime(chain_node_placement));
Tensor CalcDeviceComputeTime(const Tensor& prob_matrix) {
return MatrixRowSum(prob_matrix);
}
Tensor CalcTaskNodeTime(const Tensor& chain_node_placement) {
......@@ -25,13 +24,14 @@ Tensor CalcTaskNodeTime(const Tensor& chain_node_placement) {
auto compute_time_copies = Clone(compute_time, 2);
Tensor row_sum =
TensorProduct(MatrixRowSum(compute_time_copies.at(0)), col_ones);
return Mul(Tensor(0.5), Add(row_sum, compute_time_copies.at(1)));
return Mul(Tensor(0.5), ADD(row_sum, compute_time_copies.at(1)));
}
Tensor CalcRegstDuration(const Tensor& chain_node_placement,
const DemoChainGraph& chain_graph) {
Tensor task_node_time = CalcTaskNodeTime(chain_node_placement);
Tensor chain_node_time = MatrixColMax(task_node_time);
Tensor chain_node_time =
MatrixColMax(Tensor(task_node_time, [](const Buffer&) {}));
auto GetTime = [chain_node_time](int64_t chain_node_id) -> double {
return chain_node_time.At(chain_node_id);
};
......@@ -51,8 +51,10 @@ Tensor CalcRegstMemory(const Tensor& chain_node_placement,
Tensor clone_workload_ratio = Tanh(copies.at(2));
Tensor clone_weight = TensorProduct(
row_ones, Tensor(Shape({regst_num}), chain_graph.RegstId2IsCloned()));
return Add(ElemWiseMul(clone_workload_ratio, clone_weight),
ElemWiseMul(split_workload_ratio, Sub(Tensor(1), clone_weight)));
auto clone_weight_copies = Clone(clone_weight, 2);
return ADD(ElemWiseMul(clone_workload_ratio, clone_weight_copies.at(0)),
ElemWiseMul(split_workload_ratio,
Sub(Tensor(1), clone_weight_copies.at(1))));
}
Tensor CalcIIRatio(const Tensor& chain_node_placement,
......@@ -67,62 +69,90 @@ Tensor CalcIIRatio(const Tensor& chain_node_placement,
Tensor CalcDeviceMemII(const Tensor& chain_node_placement,
const DemoChainGraph& chain_graph,
int piece_num_in_batch, double mem_size_per_device) {
Tensor regst_mem = CalcRegstMemory(chain_node_placement, chain_graph);
Tensor regst_duration = CalcRegstDuration(chain_node_placement, chain_graph);
auto placement_copies = Clone(chain_node_placement, 2);
Tensor regst_mem = CalcRegstMemory(placement_copies.at(0), chain_graph);
Tensor regst_duration =
CalcRegstDuration(placement_copies.at(1), chain_graph);
Tensor ii_ratio =
CalcIIRatio(chain_node_placement, chain_graph, piece_num_in_batch);
auto ii_ratio_copies = Clone(ii_ratio, 2);
auto regst_mem_copies = Clone(regst_mem, 2);
Tensor weighted_mem_time = ElemWiseMul(ElemWiseMul(ii_ratio, regst_duration),
regst_mem_copies.at(0));
Tensor weighted_mem_ceil_diff =
ElemWiseMul(Sub(Tensor(1.5), ii_ratio), regst_mem_copies.at(1));
Tensor weighted_mem_time =
ElemWiseMul(ElemWiseMul(ii_ratio_copies.at(0), regst_duration),
regst_mem_copies.at(0));
Tensor weighted_mem_ceil_diff = ElemWiseMul(
Sub(Tensor(1.5), ii_ratio_copies.at(1)), regst_mem_copies.at(1));
Tensor device_mem_time = MatrixRowSum(weighted_mem_time);
Tensor device_mem =
Sub(Tensor(mem_size_per_device), MatrixRowSum(weighted_mem_ceil_diff));
Tensor epsilon(0.000000000001);
Tensor row_ones(Shape({chain_node_placement.shape().At(0)}), 1);
Tensor cliped_device_mem = Max(device_mem, TensorProduct(row_ones, epsilon));
int64_t dev_num = chain_node_placement.shape().At(0);
Tensor row_ones(Shape({dev_num}), 1);
Tensor epsilon = Reshape(TensorProduct(row_ones, Tensor(0.000000000001)),
Shape({dev_num}));
Tensor cliped_device_mem = Max(device_mem, epsilon);
return ElemWiseDiv(device_mem_time, cliped_device_mem);
}
Tensor ProbabilityMatrix(Tensor* var, double lr) {
Tensor row_ones(Shape({var->shape().At(0)}), 1);
Tensor epsilon(0.000000001);
Tensor x = ADD(Square(FixedExpectation(Update(var, lr), 1)), epsilon);
auto x_copies = Clone(x, 2);
Tensor x_col_sum = TensorProduct(row_ones, MatrixColSum(x_copies.at(0)));
return ElemWiseDiv(x_copies.at(1), x_col_sum);
}
void AutoPlacementMemoryDemo() {
std::random_device rd{};
std::mt19937 gen{rd()};
std::normal_distribution<double> distr(1, 0.1);
DemoChainGraph chain_graph([](DemoChainGraphBuilder* builder) {
builder->Backward(builder->Op(
"loss",
{builder->ModelOp(
"op3", {builder->ModelOp(
"op2", {builder->ModelOp(
"op1", {builder->ModelOp("op0")})})})}));
auto regst = builder->ModelOp("op0");
FOR_RANGE(int, i, 1, 11) {
regst = builder->ModelOp("op" + std::to_string(i), {regst});
}
builder->Backward(builder->ModelOp("loss", {regst}));
});
auto chain_node2fw_id = chain_graph.CalcChainNodeId2FwChainNodeId();
int64_t fw_node_num = chain_graph.FwChainNodeNum();
// std::cout << fw_node_num << std::endl;
// return;
Shape shape({7, fw_node_num});
Tensor fw_var(shape, [](size_t index) { return index % 2 ? 1 : 0; });
Tensor epsilon(0.000000000001);
Tensor ceil_tensor(shape, 1);
Tensor floor_tensor(shape, 0.000000000001);
FOR_RANGE(int, i, 0, 50000) {
Shape shape({6, fw_node_num});
Tensor fw_var(shape, [&](size_t index) { return distr(gen); });
Tensor floor_tensor(shape, 0.000000001);
FOR_RANGE(int, i, 0, 5000) {
double lr = 0.01;
Tensor x = Add(Square((FixedExpectation(Update(&fw_var, lr), 1))), epsilon);
const auto& placement_copies = Clone(x, 3);
Tensor computation_ii = CalcDeviceComputeTime(placement_copies.at(0));
Tensor fw_prob = ProbabilityMatrix(&fw_var, lr);
Tensor chain_node_prob = ColIndexReduce(fw_prob, chain_node2fw_id);
auto chain_prob_copies = Clone(chain_node_prob, 2);
Tensor computation_ii = MatrixRowSum(chain_prob_copies.at(0));
// auto comp_ii_copies = Clone(computation_ii, 2);
// Tensor mem_ii = CalcDeviceMemII(placement_copies.at(1), chain_graph,
// 4, 50); Tensor ii = Max(MaxElem(comp_ii_copies.at(0)),
// MaxElem(mem_ii));
Tensor ii = MaxElem(computation_ii);
Tensor penalty = Add(Variance(MatrixColMax(placement_copies.at(1))),
DoubleVariance(placement_copies.at(2)));
Backward(Add(ii, penalty));
std::cout << "x: ";
for (double i : x.buffer().data()) { std::cout << i << " "; }
std::cout << std::endl;
// Tensor penalty = ADD(Variance(MatrixColMax(placement_copies.at(2))),
// ADD(DoubleVariance(placement_copies.at(3)),
// Variance(comp_ii_copies.at(1))));
Tensor penalty = Mul(Sum(Sqrt(chain_prob_copies.at(1))), Tensor(1));
BackwardRun(ADD(ii, penalty));
std::cout << "fw_prob: " << std::endl;
FOR_RANGE(int, j, 0, fw_prob.shape().At(1)) {
FOR_RANGE(int, i, 0, fw_prob.shape().At(0)) {
std::cout << std::setprecision(3) << fw_prob.At(i, j) << "\t";
}
std::cout << std::endl;
}
std::cout << "computation_ii: ";
for (double i : computation_ii.buffer().data()) { std::cout << i << " "; }
std::cout << std::endl << std::endl;
std::cout << std::endl;
// std::cout << "mem_ii: "; for (double i :
// mem_ii.buffer().data()) { std::cout << i << " "; } std::cout <<
// std::endl;
// Backward(Variance(MatrixColMax(Update(&var, lr))));
// std::cout << "var: ";
// for (double i : var.buffer().data()) { std::cout << i << " "; }
// std::cout << std::endl;
std::cout << std::endl;
}
}
......@@ -134,14 +164,14 @@ void AutoPlacementComputationDemo() {
FOR_RANGE(int, i, 0, 10000) {
double lr = 0.001;
Tensor x = Add(Square(FixedExpectation(Update(&var, lr), 1)), epsilon);
Tensor x = ADD(Square(FixedExpectation(Update(&var, lr), 1)), epsilon);
const auto& x_copies = Clone(x, 4);
Tensor row = MatrixRowSum(x_copies.at(0));
Tensor col = MatrixColSum(x_copies.at(1));
Tensor load = ElemWiseDiv(x_copies.at(2), TensorProduct(row_ones, col));
Tensor table = ElemWiseMul(TensorProduct(row, col_ones), load);
Tensor ii = MaxElem(table);
Backward(Add(ii, Variance(MatrixColMax(x_copies.at(3)))));
BackwardRun(ADD(ii, Variance(MatrixColMax(x_copies.at(3)))));
std::cout << "x: ";
for (double i : x.buffer().data()) { std::cout << i << " "; }
......@@ -155,11 +185,6 @@ void AutoPlacementComputationDemo() {
std::cout << "table: ";
for (double i : table.buffer().data()) { std::cout << i << " "; }
std::cout << std::endl << std::endl;
// Backward(Variance(MatrixColMax(Update(&var, lr))));
// std::cout << "var: ";
// for (double i : var.buffer().data()) { std::cout << i << " "; }
// std::cout << std::endl;
}
}
......
......@@ -2,86 +2,134 @@
#define ONEFLOW_CORE_AUTO_PLACEMNENT_DF_FUNC_H_
#include "oneflow/core/auto_placement/tensor.h"
#include "oneflow/core/common/preprocessor.h"
namespace oneflow {
namespace df {
Tensor ColIndexReduce(const Tensor& input,
const std::vector<std::vector<int64_t>>& reduce_indexes);
Tensor _Update(const std::string& caller, Tensor* var, double lr);
#define Update(...) _Update(__LOC__, __VA_ARGS__)
Tensor IndexReduce(const Tensor& input,
const std::vector<std::vector<int64_t>>& reduce_indexes);
Tensor _DiffWatch(const std::string& caller, const Tensor& input,
const std::function<void(const Buffer& out_diff)>& Handler);
#define DiffWatch(...) _DiffWatch(__LOC__, __VA_ARGS__)
Tensor Update(Tensor* var, double lr);
#define __LOC__ __FILE__ ":" OF_PP_STRINGIZE(__LINE__)
std::vector<Tensor> Clone(const Tensor& input, size_t n);
Tensor _ColIndexReduce(const std::string& caller, const Tensor& input,
const std::vector<std::vector<int64_t>>& reduce_indexes);
#define ColIndexReduce(...) _ColIndexReduce(__LOC__, __VA_ARGS__)
Tensor Reshape(const Tensor& input, const Shape& shape);
Tensor _IndexReduce(const std::string& caller, const Tensor& input,
const std::vector<std::vector<int64_t>>& reduce_indexes);
#define IndexReduce(...) _IndexReduce(__LOC__, __VA_ARGS__)
Tensor Minus(const Tensor& input);
std::vector<Tensor> _Clone(const std::string& caller, const Tensor& input,
size_t n);
#define Clone(...) _Clone(__LOC__, __VA_ARGS__)
Tensor Abs(const Tensor& input);
Tensor _Reshape(const std::string& caller, const Tensor& input,
const Shape& shape);
#define Reshape(...) _Reshape(__LOC__, __VA_ARGS__)
Tensor Exp(const Tensor& input);
Tensor _Minus(const std::string& caller, const Tensor& input);
#define Minus(...) _Minus(__LOC__, __VA_ARGS__)
Tensor Tanh(const Tensor& input);
Tensor _Abs(const std::string& caller, const Tensor& input);
#define Abs(...) _Abs(__LOC__, __VA_ARGS__)
Tensor _Exp(const std::string& caller, const Tensor& input);
#define Exp(...) _Exp(__LOC__, __VA_ARGS__)
Tensor _Tanh(const std::string& caller, const Tensor& input);
#define Tanh(...) _Tanh(__LOC__, __VA_ARGS__)
Tensor Tee(const Tensor& input, Tensor* out);
Tensor Add(const Tensor& a, const Tensor& b);
Tensor _Add(const std::string& caller, const Tensor& a, const Tensor& b);
#define ADD(...) _Add(__LOC__, __VA_ARGS__)
Tensor Sub(const Tensor& a, const Tensor& b);
Tensor _Sub(const std::string& caller, const Tensor& a, const Tensor& b);
#define Sub(...) _Sub(__LOC__, __VA_ARGS__)
Tensor ElemWiseMul(const Tensor& a, const Tensor& b);
Tensor _ElemWiseMul(const std::string& caller, const Tensor& a,
const Tensor& b);
#define ElemWiseMul(...) _ElemWiseMul(__LOC__, __VA_ARGS__)
Tensor ElemWiseDiv(const Tensor& a, const Tensor& b);
Tensor _ElemWiseDiv(const std::string& caller, const Tensor& a,
const Tensor& b);
#define ElemWiseDiv(...) _ElemWiseDiv(__LOC__, __VA_ARGS__)
Tensor Mul(const Tensor& a, const Tensor& b);
Tensor _Mul(const std::string& caller, const Tensor& a, const Tensor& b);
#define Mul(...) _Mul(__LOC__, __VA_ARGS__)
Tensor Reciprocal(const Tensor& input);
Tensor _Reciprocal(const std::string& caller, const Tensor& input);
#define Reciprocal(...) _Reciprocal(__LOC__, __VA_ARGS__)
Tensor Max(const Tensor& a, const Tensor& b);
Tensor _Max(const std::string& caller, const Tensor& a, const Tensor& b);
#define Max(...) _Max(__LOC__, __VA_ARGS__)
Tensor Min(const Tensor& a, const Tensor& b);
Tensor _Min(const std::string& caller, const Tensor& a, const Tensor& b);
#define Min(...) _Min(__LOC__, __VA_ARGS__)
Tensor MaxElem(const Tensor& a);
Tensor _MaxElem(const std::string& caller, const Tensor& a);
#define MaxElem(...) _MaxElem(__LOC__, __VA_ARGS__)
Tensor Relu(const Tensor& input);
Tensor _Relu(const std::string& caller, const Tensor& input);
#define Relu(...) _Relu(__LOC__, __VA_ARGS__)
Tensor MinElem(const Tensor& a);
Tensor _MinElem(const std::string& caller, const Tensor& a);
#define MinElem(...) _MinElem(__LOC__, __VA_ARGS__)
Tensor Sum(const Tensor& a);
Tensor _Sum(const std::string& caller, const Tensor& a);
#define Sum(...) _Sum(__LOC__, __VA_ARGS__)
Tensor Avg(const Tensor& a);
Tensor _Avg(const std::string& caller, const Tensor& a);
#define Avg(...) _Avg(__LOC__, __VA_ARGS__)
Tensor Variance(const Tensor& a);
Tensor _Variance(const std::string& caller, const Tensor& a);
#define Variance(...) _Variance(__LOC__, __VA_ARGS__)
Tensor StandardDeviation(const Tensor& a);
Tensor _StandardDeviation(const std::string& caller, const Tensor& a);
#define StandardDeviation(...) _StandardDeviation(__LOC__, __VA_ARGS__)
Tensor AvgAbsDeviation(const Tensor& a);
Tensor _AvgAbsDeviation(const std::string& caller, const Tensor& a);
#define AvgAbsDeviation(...) _AvgAbsDeviation(__LOC__, __VA_ARGS__)
Tensor GeAvg(const Tensor& input);
Tensor _DoubleVariance(const std::string& caller, const Tensor& input);
#define DoubleVariance(...) _DoubleVariance(__LOC__, __VA_ARGS__)
Tensor LeAvg(const Tensor& input);
Tensor _DoubleAvgAbsDeviation(const std::string& caller, const Tensor& input);
#define DoubleAvgAbsDeviation(...) _DoubleAvgAbsDeviation(__LOC__, __VA_ARGS__)
Tensor DoubleVariance(const Tensor& input);
Tensor _Square(const std::string& caller, const Tensor& input);
#define Square(...) _Square(__LOC__, __VA_ARGS__)
Tensor Square(const Tensor& input);
Tensor _Sqrt(const std::string& caller, const Tensor& input);
#define Sqrt(...) _Sqrt(__LOC__, __VA_ARGS__)
Tensor Sqrt(const Tensor& input);
Tensor _MatrixRowSum(const std::string& caller, const Tensor& input);
#define MatrixRowSum(...) _MatrixRowSum(__LOC__, __VA_ARGS__)
Tensor MatrixRowSum(const Tensor& input);
Tensor _MatrixColSum(const std::string& caller, const Tensor& input);
#define MatrixColSum(...) _MatrixColSum(__LOC__, __VA_ARGS__)
Tensor MatrixColSum(const Tensor& input);
Tensor _MatrixColMax(const std::string& caller, const Tensor& input);
#define MatrixColMax(...) _MatrixColMax(__LOC__, __VA_ARGS__)
Tensor MatrixColMax(const Tensor& input);
Tensor _TensorProduct(const std::string& caller, const Tensor& a,
const Tensor& b);
#define TensorProduct(...) _TensorProduct(__LOC__, __VA_ARGS__)
Tensor TensorProduct(const Tensor& a, const Tensor& b);
Tensor _FixedExpectation(const std::string& caller, const Tensor& a, double e);
#define FixedExpectation(...) _FixedExpectation(__LOC__, __VA_ARGS__)
Tensor FixedExpectation(const Tensor& a, double e);
Tensor _FixedMaxVal(const std::string& caller, const Tensor& a, double e);
#define FixedMaxVal(...) _FixedMaxVal(__LOC__, __VA_ARGS__)
Tensor Backward(const Tensor& loss);
Tensor _Backward(const std::string& caller, const Tensor& loss);
#define BackwardRun(...) _Backward(__LOC__, __VA_ARGS__)
} // namespace df
......
......@@ -25,7 +25,7 @@ class Tensor final {
Tensor(const Shape& shape, const std::vector<double>& data)
: buffer_(std::shared_ptr<Buffer>(new Buffer(shape, data))),
diff_handler_([](const Buffer&) {}) {}
Tensor(std::shared_ptr<Buffer> buffer,
Tensor(const std::shared_ptr<Buffer>& buffer,
const std::function<void(const Buffer&)>& diff_handler)
: buffer_(buffer), diff_handler_(diff_handler) {}
Tensor(Tensor tensor, const std::function<void(const Buffer&)>& diff_handler)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册