Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
7df301f2
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7df301f2
编写于
11月 28, 2021
作者:
Z
Zhanlue Yang
提交者:
GitHub
11月 28, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Added performance tests for Eager Dygraph
#1
(#37638)
上级
48faf638
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
837 addition
and
0 deletion
+837
-0
paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
...le/fluid/eager/tests/performance_tests/benchmark_utils.cc
+324
-0
paddle/fluid/eager/tests/performance_tests/benchmark_utils.h
paddle/fluid/eager/tests/performance_tests/benchmark_utils.h
+95
-0
paddle/fluid/eager/tests/task_tests/CMakeLists.txt
paddle/fluid/eager/tests/task_tests/CMakeLists.txt
+1
-0
paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
+417
-0
未找到文件。
paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
0 → 100644
浏览文件 @
7df301f2
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h"
#include <iostream>
#include <memory>
#include <set>
#include <string>
#include <vector>
// Eager
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/tests/test_utils.h"
#include "paddle/fluid/eager/utils.h"
// Eager Generated
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
// Fluid
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h"
#include "paddle/pten/core/kernel_registry.h"
static
size_t
max_num_benchmark_runs
=
5000
;
namespace
egr
{
/* --------------------- */
/* ---- Eager Scale ---- */
/* --------------------- */
void
benchmark_eager_scale
(
const
EagerTensor
&
tensor
,
bool
accuracy_check
)
{
EagerTensor
input_tensor
=
tensor
;
float
scale
=
2.0
;
float
bias
=
3.0
;
size_t
max_num_runs
=
accuracy_check
?
10
:
max_num_benchmark_runs
;
for
(
size_t
i
=
0
;
i
<
max_num_runs
;
i
++
)
{
input_tensor
=
egr
::
scale
(
input_tensor
,
scale
,
bias
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
}
std
::
vector
<
EagerTensor
>
target_tensors
=
{
input_tensor
};
RunBackward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
// Examine Forward Grad (w.r.t max_num_runs = 10)
CompareTensorWithValue
<
float
>
(
input_tensor
,
8189.0
);
// Examine Backward Grad (w.r.t max_num_runs = 10)
CompareGradTensorWithValue
<
float
>
(
tensor
,
1024.0
);
}
}
/* ----------------------------------- */
/* ---- Eager Intermediate Matmul ---- */
/* ----------------------------------- */
void
benchmark_eager_intermediate_matmul
(
const
EagerTensor
&
X
,
const
EagerTensor
&
Y
,
bool
accuracy_check
)
{
EagerTensor
input_tensor0
=
X
;
size_t
max_num_runs
=
accuracy_check
?
2
:
max_num_benchmark_runs
;
for
(
size_t
i
=
0
;
i
<
max_num_runs
;
i
++
)
{
input_tensor0
=
matmul_v2_dygraph_function
(
input_tensor0
,
Y
,
{{
"trans_x"
,
false
},
{
"trans_y"
,
false
}});
}
std
::
vector
<
EagerTensor
>
target_tensors
=
{
input_tensor0
};
RunBackward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
// Examine Forward Grad (w.r.t max_num_runs = 2)
CompareVariableWithValue
<
float
>
(
input_tensor0
,
16
);
// Examine Backward Grad (w.r.t max_num_runs = 2)
CompareGradVariableWithValue
<
float
>
(
X
,
16
);
CompareGradVariableWithValue
<
float
>
(
Y
,
16
);
}
}
/* -------------------------------- */
/* ---- Eager Intermediate MLP ---- */
/* -------------------------------- */
void
benchmark_eager_intermediate_mlp
(
const
EagerTensor
&
X
,
const
std
::
vector
<
EagerTensor
>&
Ws
,
const
std
::
vector
<
EagerTensor
>&
Bs
,
bool
accuracy_check
)
{
EagerTensor
input0
=
X
;
for
(
size_t
i
=
0
;
i
<
MLP_NUM_LINEAR
;
i
++
)
{
EagerTensor
Out
=
matmul_v2_dygraph_function
(
input0
,
Ws
[
i
],
{{
"trans_x"
,
false
},
{
"trans_y"
,
false
}});
input0
=
elementwise_add_dygraph_function
(
Out
,
Bs
[
i
],
{});
}
EagerTensor
Out
=
reduce_sum_dygraph_function
(
input0
,
{{
"reduce_all"
,
true
}});
std
::
vector
<
EagerTensor
>
target_tensors
=
{
Out
};
RunBackward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
std
::
unordered_map
<
std
::
string
,
float
>
result
=
compute_mlp_expected_results
();
// Examine Forward Grad (w.r.t max_num_runs = 2)
CompareVariableWithValue
<
float
>
(
Out
,
result
[
"Out"
]);
// Examine Backward Grad (w.r.t max_num_runs = 2)
CompareGradVariableWithValue
<
float
>
(
X
,
result
[
"GradX"
]);
CompareGradVariableWithValue
<
float
>
(
Ws
[
0
],
result
[
"GradW"
]);
}
}
}
// namespace egr
namespace
paddle
{
namespace
imperative
{
static
void
FluidCheckTensorValue
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
paddle
::
platform
::
Place
&
place
,
float
value
)
{
auto
*
tensor
=
X
->
MutableVar
()
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
t_ptr
=
tensor
->
mutable_data
<
float
>
(
place
);
std
::
vector
<
float
>
host_data
(
tensor
->
numel
());
if
(
place
==
paddle
::
platform
::
CUDAPlace
())
{
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
dynamic_cast
<
paddle
::
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
paddle
::
memory
::
Copy
(
paddle
::
platform
::
CPUPlace
(),
host_data
.
data
(),
paddle
::
platform
::
CUDAPlace
(),
t_ptr
,
sizeof
(
float
)
*
tensor
->
numel
(),
stream
);
t_ptr
=
host_data
.
data
();
}
VLOG
(
6
)
<<
"Tensor Value: "
<<
t_ptr
[
0
]
<<
", Expected Value: "
<<
value
;
PADDLE_ENFORCE
(
t_ptr
[
0
]
==
value
,
paddle
::
platform
::
errors
::
Fatal
(
"Detected numerical Error, Expected %f but got %f"
,
value
,
t_ptr
[
0
]));
}
static
void
FluidCheckGradTensorValue
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
paddle
::
platform
::
Place
&
place
,
float
value
)
{
auto
*
grad_tensor
=
X
->
MutableGradVar
()
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
g_ptr
=
grad_tensor
->
mutable_data
<
float
>
(
place
);
std
::
vector
<
float
>
g_host_data
(
grad_tensor
->
numel
());
if
(
place
==
paddle
::
platform
::
CUDAPlace
())
{
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
dynamic_cast
<
paddle
::
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
paddle
::
memory
::
Copy
(
paddle
::
platform
::
CPUPlace
(),
g_host_data
.
data
(),
paddle
::
platform
::
CUDAPlace
(),
g_ptr
,
sizeof
(
float
)
*
grad_tensor
->
numel
(),
stream
);
g_ptr
=
g_host_data
.
data
();
}
VLOG
(
6
)
<<
"Tensor Value: "
<<
g_ptr
[
0
]
<<
", Expected Value: "
<<
value
;
PADDLE_ENFORCE
(
g_ptr
[
0
]
==
value
,
paddle
::
platform
::
errors
::
Fatal
(
"Detected numerical Error, Expected %f but got %f"
,
value
,
g_ptr
[
0
]));
}
/* --------------------- */
/* ---- Fluid Scale ---- */
/* --------------------- */
// TODO(jiabin): Change this and remove nolint
void
benchmark_fluid_scale
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
paddle
::
platform
::
Place
&
place
,
bool
accuracy_check
)
{
imperative
::
Tracer
tracer
;
framework
::
AttributeMap
attrs
;
attrs
[
"use_mkldnn"
]
=
false
;
attrs
[
"scale"
]
=
2
;
attrs
[
"bias"
]
=
3
;
attrs
[
"bias_after_scale"
]
=
true
;
std
::
shared_ptr
<
imperative
::
VarBase
>
tmp_out
=
X
;
size_t
max_num_runs
=
accuracy_check
?
10
:
max_num_benchmark_runs
;
for
(
size_t
i
=
0
;
i
<
max_num_runs
;
i
++
)
{
imperative
::
NameVarBaseMap
ins
=
{{
"X"
,
{
tmp_out
}}};
imperative
::
NameVarBaseMap
outs
=
{
{
"Out"
,
{
std
::
shared_ptr
<
imperative
::
VarBase
>
(
new
imperative
::
VarBase
(
true
,
"Out"
))}}};
tracer
.
TraceOp
(
"scale"
,
ins
,
outs
,
attrs
,
place
,
true
);
tmp_out
=
outs
[
"Out"
][
0
];
}
auto
*
engine
=
tracer
.
GetEngine
();
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>
grad_tensors
{
nullptr
};
engine
->
Init
({
tmp_out
},
grad_tensors
,
false
/*retain_graph*/
);
engine
->
Execute
();
if
(
accuracy_check
)
{
FluidCheckTensorValue
(
tmp_out
,
place
,
8189.0
);
FluidCheckGradTensorValue
(
X
,
place
,
1024.0
);
}
}
/* ---------------------- */
/* ---- Fluid Matmul ---- */
/* ---------------------- */
void
benchmark_fluid_matmul
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
Y
,
const
paddle
::
platform
::
Place
&
place
,
bool
accuracy_check
)
{
imperative
::
Tracer
tracer
;
std
::
shared_ptr
<
imperative
::
VarBase
>
tmp_out
=
X
;
size_t
max_num_runs
=
accuracy_check
?
2
:
max_num_benchmark_runs
;
for
(
size_t
i
=
0
;
i
<
max_num_runs
;
i
++
)
{
framework
::
AttributeMap
attrs
;
imperative
::
NameVarBaseMap
ins
=
{{
"X"
,
{
tmp_out
}},
{
"Y"
,
{
Y
}}};
imperative
::
NameVarBaseMap
outs
=
{
{
"Out"
,
{
std
::
shared_ptr
<
imperative
::
VarBase
>
(
new
imperative
::
VarBase
(
true
,
"Out"
))}}};
tracer
.
TraceOp
(
"matmul_v2"
,
ins
,
outs
,
attrs
,
place
,
true
);
tmp_out
=
outs
[
"Out"
][
0
];
}
auto
*
engine
=
tracer
.
GetEngine
();
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>
grad_tensors
{
nullptr
};
engine
->
Init
({
tmp_out
},
grad_tensors
,
false
/*retain_graph*/
);
engine
->
Execute
();
if
(
accuracy_check
)
{
FluidCheckTensorValue
(
tmp_out
,
place
,
16
);
FluidCheckGradTensorValue
(
X
,
place
,
16
);
FluidCheckGradTensorValue
(
Y
,
place
,
16
);
}
}
/* ------------------- */
/* ---- Fluid MLP ---- */
/* ------------------- */
void
benchmark_fluid_mlp
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>&
Ws
,
const
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>&
Bs
,
const
paddle
::
platform
::
Place
&
place
,
bool
accuracy_check
)
{
imperative
::
Tracer
tracer
;
imperative
::
NameVarBaseMap
ins
;
imperative
::
NameVarBaseMap
outs
;
framework
::
AttributeMap
attrs
;
std
::
shared_ptr
<
imperative
::
VarBase
>
input0
=
X
;
for
(
size_t
i
=
0
;
i
<
MLP_NUM_LINEAR
;
i
++
)
{
// Matmul0
ins
=
{{
"X"
,
{
input0
}},
{
"Y"
,
{
Ws
[
0
]}}};
outs
=
{{
"Out"
,
{
std
::
shared_ptr
<
imperative
::
VarBase
>
(
new
imperative
::
VarBase
(
true
,
"Out"
))}}};
tracer
.
TraceOp
(
"matmul_v2"
,
ins
,
outs
,
attrs
,
place
,
true
);
// EW-Add0
ins
=
{{
"X"
,
outs
[
"Out"
]},
{
"Y"
,
{
Bs
[
i
]}}};
outs
=
{{
"Out"
,
{
std
::
shared_ptr
<
imperative
::
VarBase
>
(
new
imperative
::
VarBase
(
true
,
"Out"
))}}};
tracer
.
TraceOp
(
"elementwise_add"
,
ins
,
outs
,
attrs
,
place
,
true
);
input0
=
outs
[
"Out"
][
0
];
}
// ReduceSum
ins
=
{{
"X"
,
{
input0
}}};
outs
=
{{
"Out"
,
{
std
::
shared_ptr
<
imperative
::
VarBase
>
(
new
imperative
::
VarBase
(
true
,
"Out"
))}}};
attrs
=
{{
"reduce_all"
,
true
}};
tracer
.
TraceOp
(
"reduce_sum"
,
ins
,
outs
,
attrs
,
place
,
true
);
auto
*
engine
=
tracer
.
GetEngine
();
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>
grad_tensors
{
nullptr
};
engine
->
Init
(
outs
[
"Out"
],
grad_tensors
,
false
/*retain_graph*/
);
engine
->
Execute
();
if
(
accuracy_check
)
{
std
::
unordered_map
<
std
::
string
,
float
>
result
=
egr
::
compute_mlp_expected_results
();
FluidCheckTensorValue
(
outs
[
"Out"
][
0
],
place
,
result
[
"Out"
]);
FluidCheckGradTensorValue
(
X
,
place
,
result
[
"GradX"
]);
FluidCheckGradTensorValue
(
Ws
[
0
],
place
,
result
[
"GradW"
]);
}
}
}
// namespace imperative
}
// namespace paddle
paddle/fluid/eager/tests/performance_tests/benchmark_utils.h
0 → 100644
浏览文件 @
7df301f2
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h>
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/pten/api/all.h"
/* MLP Configurations */
// Out1 = X[M, N] x W[N, K] + B[K]
// ... x MLP_NUM_LINEAR
// Out = ReduceSum(OutN)
#define MLP_M 4
#define MLP_N 16
#define MLP_K MLP_N
#define MLP_X_VAL 1.0
#define MLP_W_VAL 2.0
#define MLP_B_VAL 3.0
#define MLP_NUM_LINEAR 1000
namespace
egr
{
inline
std
::
unordered_map
<
std
::
string
,
float
>
compute_mlp_expected_results
()
{
float
Out
=
MLP_X_VAL
;
for
(
size_t
i
=
0
;
i
<
MLP_NUM_LINEAR
;
i
++
)
{
Out
=
Out
*
MLP_W_VAL
*
MLP_N
+
MLP_B_VAL
;
}
Out
=
Out
*
MLP_M
*
MLP_N
;
float
GradX
=
1.0
*
pow
((
MLP_W_VAL
*
MLP_N
),
MLP_NUM_LINEAR
);
float
GradW0
=
1.0
*
pow
((
MLP_W_VAL
*
MLP_N
),
(
MLP_NUM_LINEAR
-
1
))
*
MLP_X_VAL
*
MLP_M
;
return
{{
"Out"
,
Out
},
{
"GradX"
,
GradX
},
{
"GradW"
,
GradW0
}};
}
/* ---- Eager Scale ---- */
void
benchmark_eager_scale
(
const
EagerTensor
&
tensor
,
bool
accuracy_check
=
false
);
/* ---- Eager MatMul ---- */
/*
void benchmark_eager_matmul(const EagerTensor& X, const EagerTensor& Y,
bool accuracy_check = false);
void benchmark_eager_mlp(const EagerTensor& X,
const std::vector<EagerTensor>& Ws,
const std::vector<EagerTensor>& Bs,
bool accuracy_check = false);
*/
void
benchmark_eager_intermediate_matmul
(
const
EagerTensor
&
X
,
const
EagerTensor
&
Y
,
bool
accuracy_check
=
false
);
void
benchmark_eager_intermediate_mlp
(
const
EagerTensor
&
X
,
const
std
::
vector
<
EagerTensor
>&
Ws
,
const
std
::
vector
<
EagerTensor
>&
Bs
,
bool
accuracy_check
=
false
);
}
// namespace egr
namespace
paddle
{
namespace
imperative
{
/* ---- Fluid Scale ---- */
// TODO(jiabin): Change this and remove nolint
void
benchmark_fluid_scale
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
// NOLINT
const
paddle
::
platform
::
Place
&
place
,
bool
accuracy_check
=
false
);
/* ---- Fluid MatMul ---- */
void
benchmark_fluid_matmul
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
Y
,
// NOLINT
const
paddle
::
platform
::
Place
&
place
,
bool
accuracy_check
=
false
);
/* ---- Fluid MLP ---- */
void
benchmark_fluid_mlp
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>&
Ws
,
const
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>&
Bs
,
const
paddle
::
platform
::
Place
&
place
,
bool
accuracy_check
=
false
);
}
// namespace imperative
}
// namespace paddle
paddle/fluid/eager/tests/task_tests/CMakeLists.txt
浏览文件 @
7df301f2
...
...
@@ -4,3 +4,4 @@ cc_test(test_egr_task_forward_autograd SRCS forward_autograd_test.cc DEPS ${eage
cc_test
(
test_egr_task_backward SRCS backward_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_hook SRCS hook_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
0 → 100644
浏览文件 @
7df301f2
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/tensor_meta.h"
#include "paddle/fluid/eager/tests/test_utils.h"
// TODO(jiabin): remove nolint here!!!
using
namespace
egr
;
// NOLINT
namespace
eager_test
{
egr
::
EagerTensor
hook_function
(
const
egr
::
EagerTensor
&
t
)
{
auto
t_dense
=
std
::
dynamic_pointer_cast
<
pten
::
DenseTensor
>
(
t
.
impl
());
auto
ret_meta
=
pten
::
DenseTensorMeta
(
t_dense
->
dtype
(),
t_dense
->
dims
(),
t_dense
->
layout
());
auto
place
=
t_dense
->
place
();
size_t
bytes_size
=
paddle
::
framework
::
product
(
t_dense
->
dims
())
*
SizeOf
(
t_dense
->
dtype
());
auto
ret_dense
=
std
::
make_shared
<
pten
::
DenseTensor
>
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
paddle
::
memory
::
Alloc
(
place
,
bytes_size
),
0
),
std
::
move
(
ret_meta
));
float
*
t_ptr
=
t_dense
->
mutable_data
<
float
>
();
float
*
ret_ptr
=
ret_dense
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
ret_dense
->
numel
();
i
++
)
{
ret_ptr
[
i
]
=
t_ptr
[
i
]
+
5.0
;
}
auto
ret_impl
=
std
::
dynamic_pointer_cast
<
pten
::
TensorBase
>
(
ret_dense
);
egr
::
EagerTensor
ret
=
egr
::
EagerTensor
();
ret
.
set_impl
(
ret_impl
);
return
ret
;
}
TEST
(
FwdBwdJoint
,
SingleNode
)
{
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
// 3. Run Forward
float
scale
=
2.0
;
float
bias
=
3.0
;
egr
::
EagerTensor
out
=
egr
::
scale
(
tensor
,
scale
,
bias
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Examine Forward Output
CompareTensorWithValue
<
float
>
(
out
,
13.0
);
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out
};
// 4. Run Backward
RunBackward
(
outs
,
{});
VLOG
(
7
)
<<
"Target Grad is: "
<<
std
::
static_pointer_cast
<
pten
::
DenseTensor
>
(
EagerUtils
::
unsafe_autograd_meta
(
tensor
)
->
Grad
().
impl
())
->
data
<
float
>
()[
0
];
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
2.0
);
}
/*
inp
|
Node0
|
Node1
|
out
*/
TEST
(
FwdBwdJoint
,
LinearNodes
)
{
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
// 3. Run Forward
// Run Forward Node 0
float
scale0
=
2.0
;
float
bias0
=
3.0
;
egr
::
EagerTensor
out0
=
egr
::
scale
(
tensor
,
scale0
,
bias0
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 1
float
scale1
=
5.0
;
float
bias1
=
10.0
;
egr
::
EagerTensor
out1
=
egr
::
scale
(
out0
,
scale1
,
bias1
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Examine Forward Output 0
CompareTensorWithValue
<
float
>
(
out0
,
13.0
);
// Examine Forward Output 1
CompareTensorWithValue
<
float
>
(
out1
,
75.0
);
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out1
};
// 4. Run Backward
RunBackward
(
outs
,
{});
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
10.0
);
}
/*
inp
|
Node0
____|____
| |
Node1 Node2
| |
out1 out2
*/
TEST
(
FwdBwdJoint
,
BranchedNodes
)
{
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
// 3. Run Forward
// Run Forward Node 0
float
scale0
=
2.0
;
float
bias0
=
3.0
;
egr
::
EagerTensor
out0
=
egr
::
scale
(
tensor
,
scale0
,
bias0
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 1
float
scale1
=
5.0
;
float
bias1
=
10.0
;
egr
::
EagerTensor
out1
=
egr
::
scale
(
out0
,
scale1
,
bias1
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 2
float
scale2
=
10.0
;
float
bias2
=
20.0
;
egr
::
EagerTensor
out2
=
egr
::
scale
(
out0
,
scale2
,
bias2
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Examine Forward Output 0
CompareTensorWithValue
<
float
>
(
out0
,
13.0
);
// Examine Forward Output 1
CompareTensorWithValue
<
float
>
(
out1
,
75.0
);
// Examine Forward Output 2
{
auto
dense_out
=
std
::
dynamic_pointer_cast
<
pten
::
DenseTensor
>
(
out2
.
impl
());
float
*
ptr
=
dense_out
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
20
;
i
++
)
{
PADDLE_ENFORCE
(
ptr
[
i
]
==
150.0
,
paddle
::
platform
::
errors
::
Fatal
(
"Detected numerical Error, Expected %f but got %f"
,
150.0
,
ptr
[
i
]));
}
}
// 4. Run Backward
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out1
,
out2
};
RunBackward
(
outs
,
{});
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
}
/*
inp
|
Node0
____|____
| |
Node1 Node2
| |
out1 out2
*/
TEST
(
FwdBwdJoint
,
GradientHook
)
{
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
std
::
function
<
egr
::
EagerTensor
(
const
egr
::
EagerTensor
&
)
>
hook
=
&
hook_function
;
// 3. Run Forward
// Run Forward Node 0
float
scale0
=
2.0
;
float
bias0
=
3.0
;
egr
::
EagerTensor
out0
=
egr
::
scale
(
tensor
,
scale0
,
bias0
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
RetainGradForTensor
(
out0
);
// hook: +5
RegisterGradientHookForTensor
(
out0
,
hook
);
// hook: +5
// Run Forward Node 1
float
scale1
=
5.0
;
float
bias1
=
10.0
;
egr
::
EagerTensor
out1
=
egr
::
scale
(
out0
,
scale1
,
bias1
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
RetainGradForTensor
(
out1
);
// hook: +5
RegisterGradientHookForTensor
(
out1
,
hook
);
// hook: +5
// Run Forward Node 2
float
scale2
=
10.0
;
float
bias2
=
20.0
;
egr
::
EagerTensor
out2
=
egr
::
scale
(
out0
,
scale2
,
bias2
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
RetainGradForTensor
(
out2
);
// hook: +5
RegisterGradientHookForTensor
(
out2
,
hook
);
// hook: +5
// 4. Run Backward
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out1
,
out2
};
RunBackward
(
outs
,
{});
// Examine Backward Grad
// leaf grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
190.0
);
// out0 grad
CompareGradTensorWithValue
<
float
>
(
out0
,
90.0
);
// out1 grad
CompareGradTensorWithValue
<
float
>
(
out1
,
1.0
);
// out2 grad
CompareGradTensorWithValue
<
float
>
(
out2
,
1.0
);
}
/*
inp
|
Node0
____|____
| |
Node1 Node2
| |
out1 out2
*/
TEST
(
FwdBwdJoint
,
CrossBatchAccumulation
)
{
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
// 3. Run Forward
// Run Forward Node 0
float
scale0
=
2.0
;
float
bias0
=
3.0
;
egr
::
EagerTensor
out0
=
egr
::
scale
(
tensor
,
scale0
,
bias0
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 1
float
scale1
=
5.0
;
float
bias1
=
10.0
;
egr
::
EagerTensor
out1
=
egr
::
scale
(
out0
,
scale1
,
bias1
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 2
float
scale2
=
10.0
;
float
bias2
=
20.0
;
egr
::
EagerTensor
out2
=
egr
::
scale
(
out0
,
scale2
,
bias2
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// 4. Run Backward
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out1
,
out2
};
RunBackward
(
outs
,
{});
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
// Cross Batch Accumulation
RunBackward
(
outs
,
{});
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
60.0
);
}
/* ---------------------------------------------------- */
/* ---------------------- CUDA Tests ------------------ */
/* ---------------------------------------------------- */
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST
(
FwdBwdJoint
,
SingleNodeCUDA
)
{
InitEnv
(
paddle
::
platform
::
CUDAPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CUDAPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
// 3. Run Forward
float
scale
=
2.0
;
float
bias
=
3.0
;
egr
::
EagerTensor
out
=
egr
::
scale
(
tensor
,
scale
,
bias
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Examine Forward Output
CompareTensorWithValue
<
float
>
(
out
,
13.0
);
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out
};
// 4. Run Backward
RunBackward
(
outs
,
{});
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
2.0
);
}
/*
inp
|
Node0
____|____
| |
Node1 Node2
| |
out1 out2
*/
TEST
(
FwdBwdJoint
,
BranchedNodesCUDA
)
{
InitEnv
(
paddle
::
platform
::
CUDAPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CUDAPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
// 3. Run Forward
// Run Forward Node 0
float
scale0
=
2.0
;
float
bias0
=
3.0
;
egr
::
EagerTensor
out0
=
egr
::
scale
(
tensor
,
scale0
,
bias0
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 1
float
scale1
=
5.0
;
float
bias1
=
10.0
;
egr
::
EagerTensor
out1
=
egr
::
scale
(
out0
,
scale1
,
bias1
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 2
float
scale2
=
10.0
;
float
bias2
=
20.0
;
egr
::
EagerTensor
out2
=
egr
::
scale
(
out0
,
scale2
,
bias2
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Examine Forward Output 0
CompareTensorWithValue
<
float
>
(
out0
,
13.0
);
// Examine Forward Output 1
CompareTensorWithValue
<
float
>
(
out1
,
75.0
);
// Examine Forward Output 2
CompareTensorWithValue
<
float
>
(
out2
,
150.0
);
// TODO(jiabin): fix this with add functor
// 4. Run Backward
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out1
,
out2
};
RunBackward
(
outs
,
{});
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
}
#endif
}
// namespace eager_test
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录