Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Oneflow-Inc
oneflow
提交
a5571351
O
oneflow
项目概览
Oneflow-Inc
/
oneflow
上一次同步 接近 3 年
通知
13
Star
2733
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
oneflow
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a5571351
编写于
2月 27, 2018
作者:
X
Xinqi Li
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ii calculated by device memory
Former-commit-id: 54f694906a09b8828b9dacecb38b45b92cdba123
上级
0535db71
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
108 addition
and
31 deletion
+108
-31
oneflow/core/auto_placement/demo_chain_graph.cpp
oneflow/core/auto_placement/demo_chain_graph.cpp
+7
-0
oneflow/core/auto_placement/demo_chain_graph.h
oneflow/core/auto_placement/demo_chain_graph.h
+5
-0
oneflow/core/auto_placement/df_demo.cpp
oneflow/core/auto_placement/df_demo.cpp
+84
-22
oneflow/core/auto_placement/df_func.cpp
oneflow/core/auto_placement/df_func.cpp
+5
-6
oneflow/core/auto_placement/df_func.h
oneflow/core/auto_placement/df_func.h
+3
-3
oneflow/core/auto_placement/tensor.h
oneflow/core/auto_placement/tensor.h
+4
-0
未找到文件。
oneflow/core/auto_placement/demo_chain_graph.cpp
浏览文件 @
a5571351
...
...
@@ -4,6 +4,13 @@ namespace oneflow {
namespace
df
{
DemoChainRegst
*
DemoChainGraphBuilder
::
ModelOp
(
const
std
::
string
&
name
,
const
std
::
vector
<
DemoChainRegst
*>&
inputs
)
{
std
::
vector
<
DemoChainRegst
*>
all_inputs
(
inputs
);
all_inputs
.
push_back
(
Model
(
"model_"
+
name
));
return
Op
(
name
,
all_inputs
);
}
DemoChainRegst
*
DemoChainGraphBuilder
::
Op
(
const
std
::
string
&
name
,
const
std
::
vector
<
DemoChainRegst
*>&
inputs
)
{
DemoChainNode
*
fw_node
=
NewForwardNode
(
name
);
...
...
oneflow/core/auto_placement/demo_chain_graph.h
浏览文件 @
a5571351
...
...
@@ -183,6 +183,11 @@ class DemoChainGraphBuilder final {
DemoChainRegst
*
Op
(
const
std
::
string
&
name
)
{
return
Op
(
name
,
{});
}
DemoChainRegst
*
ModelOp
(
const
std
::
string
&
name
,
const
std
::
vector
<
DemoChainRegst
*>&
inputs
);
DemoChainRegst
*
ModelOp
(
const
std
::
string
&
name
)
{
return
ModelOp
(
name
,
{});
}
private:
int64_t
NewChainNodeId
()
{
return
++
graph_
->
chain_node_id_
;
}
int64_t
NewChainRegstId
()
{
return
++
graph_
->
chain_regst_id_
;
}
...
...
oneflow/core/auto_placement/df_demo.cpp
浏览文件 @
a5571351
...
...
@@ -13,45 +13,107 @@ Tensor CalcTaskNodeTime(const Tensor& chain_node_placement) {
auto
placement_copies
=
Clone
(
chain_node_placement
,
3
);
Tensor
col_sum
=
TensorProduct
(
row_ones
,
MatrixColSum
(
placement_copies
.
at
(
0
)));
Tensor
workload
=
ElemWise
Mul
(
placement_copies
.
at
(
1
),
Reciprocal
(
col_sum
)
);
Tensor
workload
=
ElemWise
Div
(
placement_copies
.
at
(
1
),
col_sum
);
Tensor
row_sum
=
TensorProduct
(
MatrixRowSum
(
workload
),
col_ones
);
return
ElemWiseMul
(
Tanh
(
placement_copies
.
at
(
2
)),
row_sum
);
}
Tensor
CalcMemoryII
(
const
Tensor
&
chain_node_placement
,
const
DemoChainGraph
&
chain_graph
)
{
TODO
();
return
Tensor
(
0
);
Tensor
CalcRegstDuration
(
const
Tensor
&
chain_node_placement
,
const
DemoChainGraph
&
chain_graph
)
{
Tensor
task_node_time
=
CalcTaskNodeTime
(
chain_node_placement
);
Tensor
chain_node_time
=
MatrixColMax
(
task_node_time
);
auto
GetTime
=
[
chain_node_time
](
int64_t
chain_node_id
)
->
double
{
return
chain_node_time
.
At
(
chain_node_id
);
};
auto
regst2path
=
chain_graph
.
CalcChainRegstId2PathChainNodeIds
(
GetTime
);
return
ColIndexReduce
(
task_node_time
,
regst2path
);
}
Tensor
CalcRegstMemory
(
const
Tensor
&
chain_node_placement
,
const
DemoChainGraph
&
chain_graph
)
{
auto
regst2producer
=
chain_graph
.
CalcChainRegstId2ProducerChainNodeId
();
int64_t
regst_num
=
regst2producer
.
size
();
Tensor
regst_placement
=
ColIndexReduce
(
chain_node_placement
,
regst2producer
);
Tensor
row_ones
(
Shape
({
regst_placement
.
shape
().
At
(
0
)}),
1
);
auto
copies
=
Clone
(
regst_placement
,
3
);
Tensor
col_sum
=
TensorProduct
(
row_ones
,
MatrixColSum
(
copies
.
at
(
0
)));
Tensor
split_workload_ratio
=
ElemWiseDiv
(
copies
.
at
(
1
),
col_sum
);
Tensor
clone_workload_ratio
=
Tanh
(
copies
.
at
(
2
));
Tensor
clone_weight
=
TensorProduct
(
row_ones
,
Tensor
(
Shape
({
regst_num
}),
chain_graph
.
RegstId2IsCloned
()));
return
Add
(
ElemWiseMul
(
clone_workload_ratio
,
clone_weight
),
ElemWiseMul
(
split_workload_ratio
,
Sub
(
Tensor
(
1
),
clone_weight
)));
}
Tensor
CalcIIRatio
(
const
Tensor
&
chain_node_placement
,
const
DemoChainGraph
&
chain_graph
,
int
piece_num_in_batch
)
{
auto
ii_ratios
=
chain_graph
.
RegstIIRatio
(
piece_num_in_batch
);
int64_t
regst_num
=
ii_ratios
.
size
();
Tensor
ii_ratio_tensor
(
Shape
({
regst_num
}),
ii_ratios
);
Tensor
row_ones
(
Shape
({
chain_node_placement
.
shape
().
At
(
0
)}),
1
);
return
Reciprocal
(
TensorProduct
(
row_ones
,
ii_ratio_tensor
));
}
Tensor
CalcDeviceMemII
(
const
Tensor
&
chain_node_placement
,
const
DemoChainGraph
&
chain_graph
,
int
piece_num_in_batch
,
double
mem_size_per_device
)
{
Tensor
regst_mem
=
CalcRegstMemory
(
chain_node_placement
,
chain_graph
);
Tensor
regst_duration
=
CalcRegstDuration
(
chain_node_placement
,
chain_graph
);
Tensor
ii_ratio
=
CalcIIRatio
(
chain_node_placement
,
chain_graph
,
piece_num_in_batch
);
auto
regst_mem_copies
=
Clone
(
regst_mem
,
2
);
Tensor
weighted_mem_time
=
ElemWiseMul
(
ElemWiseMul
(
ii_ratio
,
regst_duration
),
regst_mem_copies
.
at
(
0
));
Tensor
weighted_mem_ceil_diff
=
ElemWiseMul
(
Sub
(
Tensor
(
1.5
),
ii_ratio
),
regst_mem_copies
.
at
(
1
));
Tensor
device_mem_time
=
MatrixRowSum
(
weighted_mem_time
);
Tensor
device_mem
=
Sub
(
Tensor
(
mem_size_per_device
),
MatrixRowSum
(
weighted_mem_ceil_diff
));
Tensor
epsilon
(
0.000000000001
);
Tensor
row_ones
(
Shape
({
chain_node_placement
.
shape
().
At
(
0
)}),
1
);
Tensor
cliped_device_mem
=
Max
(
device_mem
,
TensorProduct
(
row_ones
,
epsilon
));
return
ElemWiseDiv
(
device_mem_time
,
cliped_device_mem
);
}
void
AutoPlacementMemoryDemo
()
{
Tensor
var
(
Shape
({
4
,
4
}),
[](
size_t
index
)
{
return
index
%
2
?
0
:
1
;
});
Tensor
row_ones
(
Shape
({
var
.
shape
().
At
(
0
)}),
1
);
Tensor
col_ones
(
Shape
({
var
.
shape
().
At
(
1
)}),
1
);
Tensor
epsilon
(
0.000000001
);
FOR_RANGE
(
int
,
i
,
0
,
1000
)
{
DemoChainGraph
chain_graph
([](
DemoChainGraphBuilder
*
builder
)
{
builder
->
Backward
(
builder
->
Op
(
"loss"
,
{
builder
->
ModelOp
(
"op3"
,
{
builder
->
ModelOp
(
"op2"
,
{
builder
->
ModelOp
(
"op1"
,
{
builder
->
ModelOp
(
"op0"
)})})})}));
});
int64_t
fw_node_num
=
chain_graph
.
FwChainNodeNum
();
Tensor
fw_var
(
Shape
({
4
,
fw_node_num
}),
[](
size_t
index
)
{
return
index
%
2
?
0
:
1
;
});
Tensor
epsilon
(
0.000000000001
);
FOR_RANGE
(
int
,
i
,
0
,
10000
)
{
double
lr
=
1
;
if
(
i
<
400
)
{
if
(
i
<
400
0
)
{
lr
=
0.1
;
}
else
if
(
i
<
600
)
{
}
else
if
(
i
<
600
0
)
{
lr
=
0.01
;
}
else
if
(
i
<
800
)
{
}
else
if
(
i
<
800
0
)
{
lr
=
0.001
;
}
else
{
lr
=
0.0001
;
}
Tensor
x
=
Add
(
Square
((
FixedExpectation
(
Update
(
&
var
,
lr
),
1
))),
epsilon
);
const
auto
&
x_copies
=
Clone
(
x
,
2
);
Tensor
time
=
CalcTaskNodeTime
(
x_copies
.
at
(
0
));
Tensor
ii
=
Max
(
time
);
Backward
(
Add
(
ii
,
AvgAbsDeviation
(
MatrixColMax
(
x_copies
.
at
(
1
)))));
Tensor
x
=
Add
(
Square
((
FixedExpectation
(
Update
(
&
fw_var
,
lr
),
1
))),
epsilon
);
Tensor
chain_node_placement
=
ColIndexReduce
(
x
,
chain_graph
.
CalcChainNodeId2FwChainNodeId
());
const
auto
&
placement_copies
=
Clone
(
x
,
2
);
Tensor
computation_ii
=
CalcTaskNodeTime
(
placement_copies
.
at
(
0
));
// Tensor memory_ii =
// CalcDeviceMemII(placement_copies.at(1), chain_graph, 10, 100);
Tensor
ii
=
MaxElem
(
computation_ii
);
Backward
(
Add
(
ii
,
AvgAbsDeviation
(
MatrixColMax
(
placement_copies
.
at
(
1
)))));
std
::
cout
<<
"x: "
;
for
(
double
i
:
x
.
buffer
().
data
())
{
std
::
cout
<<
i
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
"
time
: "
;
for
(
double
i
:
time
.
buffer
().
data
())
{
std
::
cout
<<
i
<<
" "
;
}
std
::
cout
<<
"
computation_ii
: "
;
for
(
double
i
:
computation_ii
.
buffer
().
data
())
{
std
::
cout
<<
i
<<
" "
;
}
std
::
cout
<<
std
::
endl
<<
std
::
endl
;
// Backward(Variance(MatrixColMax(Update(&var, lr))));
...
...
@@ -85,7 +147,7 @@ void AutoPlacementComputationDemo() {
Tensor
load
=
ElemWiseMul
(
x_copies
.
at
(
2
),
TensorProduct
(
row_ones
,
Reciprocal
(
col
)));
Tensor
table
=
ElemWiseMul
(
TensorProduct
(
row
,
col_ones
),
load
);
Tensor
ii
=
Max
(
table
);
Tensor
ii
=
Max
Elem
(
table
);
Backward
(
Add
(
ii
,
AvgAbsDeviation
(
MatrixColMax
(
x_copies
.
at
(
3
)))));
std
::
cout
<<
"x: "
;
...
...
oneflow/core/auto_placement/df_func.cpp
浏览文件 @
a5571351
...
...
@@ -202,7 +202,7 @@ Tensor FixedExpectation(const Tensor& input, double e) {
[
=
](
const
Buffer
&
out_diff
)
{
input
.
HandleDiff
(
out_diff
);
});
}
Tensor
Max
(
const
Tensor
&
input
)
{
Tensor
Max
Elem
(
const
Tensor
&
input
)
{
double
max_value
=
std
::
numeric_limits
<
double
>::
min
();
size_t
max_index
=
0
;
FOR_RANGE
(
int
,
i
,
0
,
input
.
Size
())
{
...
...
@@ -246,11 +246,6 @@ Tensor AvgAbsDeviation(const Tensor& input) {
return
Avg
(
Abs
(
Sub
(
copies
.
at
(
0
),
Avg
(
copies
.
at
(
1
)))));
}
Tensor
MaxDeviation
(
const
Tensor
&
input
)
{
auto
copies
=
Clone
(
input
,
2
);
return
Sub
(
Max
(
copies
.
at
(
0
)),
Min
(
copies
.
at
(
1
)));
}
Tensor
Sum
(
const
Tensor
&
input
)
{
double
sum
=
0
;
FOR_RANGE
(
int
,
i
,
0
,
input
.
Size
())
{
sum
+=
input
.
At
(
i
);
}
...
...
@@ -332,6 +327,10 @@ Tensor Reciprocal(const Tensor& input) {
});
}
Tensor
ElemWiseDiv
(
const
Tensor
&
a
,
const
Tensor
&
b
)
{
return
ElemWiseMul
(
a
,
Reciprocal
(
b
));
}
Tensor
MatrixRowSum
(
const
Tensor
&
input
)
{
CHECK
(
input
.
shape
().
dim_vec
().
size
()
==
2
);
std
::
shared_ptr
<
Buffer
>
out
(
new
Buffer
(
Shape
({
input
.
shape
().
At
(
0
)}),
0
));
...
...
oneflow/core/auto_placement/df_func.h
浏览文件 @
a5571351
...
...
@@ -30,13 +30,15 @@ Tensor Sub(const Tensor& a, const Tensor& b);
Tensor
ElemWiseMul
(
const
Tensor
&
a
,
const
Tensor
&
b
);
Tensor
ElemWiseDiv
(
const
Tensor
&
a
,
const
Tensor
&
b
);
Tensor
Mul
(
const
Tensor
&
a
,
const
Tensor
&
b
);
Tensor
Reciprocal
(
const
Tensor
&
input
);
Tensor
Max
(
const
Tensor
&
a
,
const
Tensor
&
b
);
Tensor
Max
(
const
Tensor
&
a
);
Tensor
Max
Elem
(
const
Tensor
&
a
);
Tensor
Relu
(
const
Tensor
&
input
);
...
...
@@ -48,8 +50,6 @@ Tensor Avg(const Tensor& a);
Tensor
Variance
(
const
Tensor
&
a
);
Tensor
MaxDeviation
(
const
Tensor
&
a
);
Tensor
AvgAbsDeviation
(
const
Tensor
&
a
);
Tensor
Square
(
const
Tensor
&
input
);
...
...
oneflow/core/auto_placement/tensor.h
浏览文件 @
a5571351
...
...
@@ -21,6 +21,10 @@ class Tensor final {
Tensor
(
const
Shape
&
shape
,
const
std
::
function
<
double
(
size_t
)
>&
Getter
)
:
buffer_
(
std
::
shared_ptr
<
Buffer
>
(
new
Buffer
(
shape
,
Getter
))),
diff_handler_
([](
const
Buffer
&
)
{})
{}
Tensor
(
const
Shape
&
shape
,
const
std
::
vector
<
double
>&
data
)
:
buffer_
(
std
::
shared_ptr
<
Buffer
>
(
new
Buffer
(
shape
,
data
))),
diff_handler_
([](
const
Buffer
&
)
{})
{}
Tensor
(
std
::
shared_ptr
<
Buffer
>
buffer
,
const
std
::
function
<
void
(
const
Buffer
&
)
>&
diff_handler
)
:
buffer_
(
buffer
),
diff_handler_
(
diff_handler
)
{}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录