Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
7df301f2
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2310
Star
20933
Fork
5423
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7df301f2
编写于
11月 28, 2021
作者:
Z
Zhanlue Yang
提交者:
GitHub
11月 28, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Added performance tests for Eager Dygraph
#1
(#37638)
上级
48faf638
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
837 addition
and
0 deletion
+837
-0
paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
...le/fluid/eager/tests/performance_tests/benchmark_utils.cc
+324
-0
paddle/fluid/eager/tests/performance_tests/benchmark_utils.h
paddle/fluid/eager/tests/performance_tests/benchmark_utils.h
+95
-0
paddle/fluid/eager/tests/task_tests/CMakeLists.txt
paddle/fluid/eager/tests/task_tests/CMakeLists.txt
+1
-0
paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
+417
-0
未找到文件。
paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
0 → 100644
浏览文件 @
7df301f2
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h"
#include <iostream>
#include <memory>
#include <set>
#include <string>
#include <vector>
// Eager
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/tests/test_utils.h"
#include "paddle/fluid/eager/utils.h"
// Eager Generated
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
// Fluid
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h"
#include "paddle/pten/core/kernel_registry.h"
static
size_t
max_num_benchmark_runs
=
5000
;
namespace
egr
{
/* --------------------- */
/* ---- Eager Scale ---- */
/* --------------------- */
void
benchmark_eager_scale
(
const
EagerTensor
&
tensor
,
bool
accuracy_check
)
{
EagerTensor
input_tensor
=
tensor
;
float
scale
=
2.0
;
float
bias
=
3.0
;
size_t
max_num_runs
=
accuracy_check
?
10
:
max_num_benchmark_runs
;
for
(
size_t
i
=
0
;
i
<
max_num_runs
;
i
++
)
{
input_tensor
=
egr
::
scale
(
input_tensor
,
scale
,
bias
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
}
std
::
vector
<
EagerTensor
>
target_tensors
=
{
input_tensor
};
RunBackward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
// Examine Forward Grad (w.r.t max_num_runs = 10)
CompareTensorWithValue
<
float
>
(
input_tensor
,
8189.0
);
// Examine Backward Grad (w.r.t max_num_runs = 10)
CompareGradTensorWithValue
<
float
>
(
tensor
,
1024.0
);
}
}
/* ----------------------------------- */
/* ---- Eager Intermediate Matmul ---- */
/* ----------------------------------- */
void
benchmark_eager_intermediate_matmul
(
const
EagerTensor
&
X
,
const
EagerTensor
&
Y
,
bool
accuracy_check
)
{
EagerTensor
input_tensor0
=
X
;
size_t
max_num_runs
=
accuracy_check
?
2
:
max_num_benchmark_runs
;
for
(
size_t
i
=
0
;
i
<
max_num_runs
;
i
++
)
{
input_tensor0
=
matmul_v2_dygraph_function
(
input_tensor0
,
Y
,
{{
"trans_x"
,
false
},
{
"trans_y"
,
false
}});
}
std
::
vector
<
EagerTensor
>
target_tensors
=
{
input_tensor0
};
RunBackward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
// Examine Forward Grad (w.r.t max_num_runs = 2)
CompareVariableWithValue
<
float
>
(
input_tensor0
,
16
);
// Examine Backward Grad (w.r.t max_num_runs = 2)
CompareGradVariableWithValue
<
float
>
(
X
,
16
);
CompareGradVariableWithValue
<
float
>
(
Y
,
16
);
}
}
/* -------------------------------- */
/* ---- Eager Intermediate MLP ---- */
/* -------------------------------- */
void
benchmark_eager_intermediate_mlp
(
const
EagerTensor
&
X
,
const
std
::
vector
<
EagerTensor
>&
Ws
,
const
std
::
vector
<
EagerTensor
>&
Bs
,
bool
accuracy_check
)
{
EagerTensor
input0
=
X
;
for
(
size_t
i
=
0
;
i
<
MLP_NUM_LINEAR
;
i
++
)
{
EagerTensor
Out
=
matmul_v2_dygraph_function
(
input0
,
Ws
[
i
],
{{
"trans_x"
,
false
},
{
"trans_y"
,
false
}});
input0
=
elementwise_add_dygraph_function
(
Out
,
Bs
[
i
],
{});
}
EagerTensor
Out
=
reduce_sum_dygraph_function
(
input0
,
{{
"reduce_all"
,
true
}});
std
::
vector
<
EagerTensor
>
target_tensors
=
{
Out
};
RunBackward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
std
::
unordered_map
<
std
::
string
,
float
>
result
=
compute_mlp_expected_results
();
// Examine Forward Grad (w.r.t max_num_runs = 2)
CompareVariableWithValue
<
float
>
(
Out
,
result
[
"Out"
]);
// Examine Backward Grad (w.r.t max_num_runs = 2)
CompareGradVariableWithValue
<
float
>
(
X
,
result
[
"GradX"
]);
CompareGradVariableWithValue
<
float
>
(
Ws
[
0
],
result
[
"GradW"
]);
}
}
}
// namespace egr
namespace
paddle
{
namespace
imperative
{
static
void
FluidCheckTensorValue
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
paddle
::
platform
::
Place
&
place
,
float
value
)
{
auto
*
tensor
=
X
->
MutableVar
()
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
t_ptr
=
tensor
->
mutable_data
<
float
>
(
place
);
std
::
vector
<
float
>
host_data
(
tensor
->
numel
());
if
(
place
==
paddle
::
platform
::
CUDAPlace
())
{
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
dynamic_cast
<
paddle
::
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
paddle
::
memory
::
Copy
(
paddle
::
platform
::
CPUPlace
(),
host_data
.
data
(),
paddle
::
platform
::
CUDAPlace
(),
t_ptr
,
sizeof
(
float
)
*
tensor
->
numel
(),
stream
);
t_ptr
=
host_data
.
data
();
}
VLOG
(
6
)
<<
"Tensor Value: "
<<
t_ptr
[
0
]
<<
", Expected Value: "
<<
value
;
PADDLE_ENFORCE
(
t_ptr
[
0
]
==
value
,
paddle
::
platform
::
errors
::
Fatal
(
"Detected numerical Error, Expected %f but got %f"
,
value
,
t_ptr
[
0
]));
}
static
void
FluidCheckGradTensorValue
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
paddle
::
platform
::
Place
&
place
,
float
value
)
{
auto
*
grad_tensor
=
X
->
MutableGradVar
()
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
g_ptr
=
grad_tensor
->
mutable_data
<
float
>
(
place
);
std
::
vector
<
float
>
g_host_data
(
grad_tensor
->
numel
());
if
(
place
==
paddle
::
platform
::
CUDAPlace
())
{
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
dynamic_cast
<
paddle
::
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
paddle
::
memory
::
Copy
(
paddle
::
platform
::
CPUPlace
(),
g_host_data
.
data
(),
paddle
::
platform
::
CUDAPlace
(),
g_ptr
,
sizeof
(
float
)
*
grad_tensor
->
numel
(),
stream
);
g_ptr
=
g_host_data
.
data
();
}
VLOG
(
6
)
<<
"Tensor Value: "
<<
g_ptr
[
0
]
<<
", Expected Value: "
<<
value
;
PADDLE_ENFORCE
(
g_ptr
[
0
]
==
value
,
paddle
::
platform
::
errors
::
Fatal
(
"Detected numerical Error, Expected %f but got %f"
,
value
,
g_ptr
[
0
]));
}
/* --------------------- */
/* ---- Fluid Scale ---- */
/* --------------------- */
// TODO(jiabin): Change this and remove nolint
void
benchmark_fluid_scale
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
paddle
::
platform
::
Place
&
place
,
bool
accuracy_check
)
{
imperative
::
Tracer
tracer
;
framework
::
AttributeMap
attrs
;
attrs
[
"use_mkldnn"
]
=
false
;
attrs
[
"scale"
]
=
2
;
attrs
[
"bias"
]
=
3
;
attrs
[
"bias_after_scale"
]
=
true
;
std
::
shared_ptr
<
imperative
::
VarBase
>
tmp_out
=
X
;
size_t
max_num_runs
=
accuracy_check
?
10
:
max_num_benchmark_runs
;
for
(
size_t
i
=
0
;
i
<
max_num_runs
;
i
++
)
{
imperative
::
NameVarBaseMap
ins
=
{{
"X"
,
{
tmp_out
}}};
imperative
::
NameVarBaseMap
outs
=
{
{
"Out"
,
{
std
::
shared_ptr
<
imperative
::
VarBase
>
(
new
imperative
::
VarBase
(
true
,
"Out"
))}}};
tracer
.
TraceOp
(
"scale"
,
ins
,
outs
,
attrs
,
place
,
true
);
tmp_out
=
outs
[
"Out"
][
0
];
}
auto
*
engine
=
tracer
.
GetEngine
();
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>
grad_tensors
{
nullptr
};
engine
->
Init
({
tmp_out
},
grad_tensors
,
false
/*retain_graph*/
);
engine
->
Execute
();
if
(
accuracy_check
)
{
FluidCheckTensorValue
(
tmp_out
,
place
,
8189.0
);
FluidCheckGradTensorValue
(
X
,
place
,
1024.0
);
}
}
/* ---------------------- */
/* ---- Fluid Matmul ---- */
/* ---------------------- */
void
benchmark_fluid_matmul
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
Y
,
const
paddle
::
platform
::
Place
&
place
,
bool
accuracy_check
)
{
imperative
::
Tracer
tracer
;
std
::
shared_ptr
<
imperative
::
VarBase
>
tmp_out
=
X
;
size_t
max_num_runs
=
accuracy_check
?
2
:
max_num_benchmark_runs
;
for
(
size_t
i
=
0
;
i
<
max_num_runs
;
i
++
)
{
framework
::
AttributeMap
attrs
;
imperative
::
NameVarBaseMap
ins
=
{{
"X"
,
{
tmp_out
}},
{
"Y"
,
{
Y
}}};
imperative
::
NameVarBaseMap
outs
=
{
{
"Out"
,
{
std
::
shared_ptr
<
imperative
::
VarBase
>
(
new
imperative
::
VarBase
(
true
,
"Out"
))}}};
tracer
.
TraceOp
(
"matmul_v2"
,
ins
,
outs
,
attrs
,
place
,
true
);
tmp_out
=
outs
[
"Out"
][
0
];
}
auto
*
engine
=
tracer
.
GetEngine
();
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>
grad_tensors
{
nullptr
};
engine
->
Init
({
tmp_out
},
grad_tensors
,
false
/*retain_graph*/
);
engine
->
Execute
();
if
(
accuracy_check
)
{
FluidCheckTensorValue
(
tmp_out
,
place
,
16
);
FluidCheckGradTensorValue
(
X
,
place
,
16
);
FluidCheckGradTensorValue
(
Y
,
place
,
16
);
}
}
/* ------------------- */
/* ---- Fluid MLP ---- */
/* ------------------- */
void
benchmark_fluid_mlp
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>&
Ws
,
const
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>&
Bs
,
const
paddle
::
platform
::
Place
&
place
,
bool
accuracy_check
)
{
imperative
::
Tracer
tracer
;
imperative
::
NameVarBaseMap
ins
;
imperative
::
NameVarBaseMap
outs
;
framework
::
AttributeMap
attrs
;
std
::
shared_ptr
<
imperative
::
VarBase
>
input0
=
X
;
for
(
size_t
i
=
0
;
i
<
MLP_NUM_LINEAR
;
i
++
)
{
// Matmul0
ins
=
{{
"X"
,
{
input0
}},
{
"Y"
,
{
Ws
[
0
]}}};
outs
=
{{
"Out"
,
{
std
::
shared_ptr
<
imperative
::
VarBase
>
(
new
imperative
::
VarBase
(
true
,
"Out"
))}}};
tracer
.
TraceOp
(
"matmul_v2"
,
ins
,
outs
,
attrs
,
place
,
true
);
// EW-Add0
ins
=
{{
"X"
,
outs
[
"Out"
]},
{
"Y"
,
{
Bs
[
i
]}}};
outs
=
{{
"Out"
,
{
std
::
shared_ptr
<
imperative
::
VarBase
>
(
new
imperative
::
VarBase
(
true
,
"Out"
))}}};
tracer
.
TraceOp
(
"elementwise_add"
,
ins
,
outs
,
attrs
,
place
,
true
);
input0
=
outs
[
"Out"
][
0
];
}
// ReduceSum
ins
=
{{
"X"
,
{
input0
}}};
outs
=
{{
"Out"
,
{
std
::
shared_ptr
<
imperative
::
VarBase
>
(
new
imperative
::
VarBase
(
true
,
"Out"
))}}};
attrs
=
{{
"reduce_all"
,
true
}};
tracer
.
TraceOp
(
"reduce_sum"
,
ins
,
outs
,
attrs
,
place
,
true
);
auto
*
engine
=
tracer
.
GetEngine
();
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>
grad_tensors
{
nullptr
};
engine
->
Init
(
outs
[
"Out"
],
grad_tensors
,
false
/*retain_graph*/
);
engine
->
Execute
();
if
(
accuracy_check
)
{
std
::
unordered_map
<
std
::
string
,
float
>
result
=
egr
::
compute_mlp_expected_results
();
FluidCheckTensorValue
(
outs
[
"Out"
][
0
],
place
,
result
[
"Out"
]);
FluidCheckGradTensorValue
(
X
,
place
,
result
[
"GradX"
]);
FluidCheckGradTensorValue
(
Ws
[
0
],
place
,
result
[
"GradW"
]);
}
}
}
// namespace imperative
}
// namespace paddle
paddle/fluid/eager/tests/performance_tests/benchmark_utils.h
0 → 100644
浏览文件 @
7df301f2
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h>
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/pten/api/all.h"
/* MLP Configurations */
// Out1 = X[M, N] x W[N, K] + B[K]
// ... x MLP_NUM_LINEAR
// Out = ReduceSum(OutN)
#define MLP_M 4
#define MLP_N 16
#define MLP_K MLP_N
#define MLP_X_VAL 1.0
#define MLP_W_VAL 2.0
#define MLP_B_VAL 3.0
#define MLP_NUM_LINEAR 1000
namespace
egr
{
inline
std
::
unordered_map
<
std
::
string
,
float
>
compute_mlp_expected_results
()
{
float
Out
=
MLP_X_VAL
;
for
(
size_t
i
=
0
;
i
<
MLP_NUM_LINEAR
;
i
++
)
{
Out
=
Out
*
MLP_W_VAL
*
MLP_N
+
MLP_B_VAL
;
}
Out
=
Out
*
MLP_M
*
MLP_N
;
float
GradX
=
1.0
*
pow
((
MLP_W_VAL
*
MLP_N
),
MLP_NUM_LINEAR
);
float
GradW0
=
1.0
*
pow
((
MLP_W_VAL
*
MLP_N
),
(
MLP_NUM_LINEAR
-
1
))
*
MLP_X_VAL
*
MLP_M
;
return
{{
"Out"
,
Out
},
{
"GradX"
,
GradX
},
{
"GradW"
,
GradW0
}};
}
/* ---- Eager Scale ---- */
void
benchmark_eager_scale
(
const
EagerTensor
&
tensor
,
bool
accuracy_check
=
false
);
/* ---- Eager MatMul ---- */
/*
void benchmark_eager_matmul(const EagerTensor& X, const EagerTensor& Y,
bool accuracy_check = false);
void benchmark_eager_mlp(const EagerTensor& X,
const std::vector<EagerTensor>& Ws,
const std::vector<EagerTensor>& Bs,
bool accuracy_check = false);
*/
void
benchmark_eager_intermediate_matmul
(
const
EagerTensor
&
X
,
const
EagerTensor
&
Y
,
bool
accuracy_check
=
false
);
void
benchmark_eager_intermediate_mlp
(
const
EagerTensor
&
X
,
const
std
::
vector
<
EagerTensor
>&
Ws
,
const
std
::
vector
<
EagerTensor
>&
Bs
,
bool
accuracy_check
=
false
);
}
// namespace egr
namespace
paddle
{
namespace
imperative
{
/* ---- Fluid Scale ---- */
// TODO(jiabin): Change this and remove nolint
void
benchmark_fluid_scale
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
// NOLINT
const
paddle
::
platform
::
Place
&
place
,
bool
accuracy_check
=
false
);
/* ---- Fluid MatMul ---- */
void
benchmark_fluid_matmul
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
Y
,
// NOLINT
const
paddle
::
platform
::
Place
&
place
,
bool
accuracy_check
=
false
);
/* ---- Fluid MLP ---- */
void
benchmark_fluid_mlp
(
const
std
::
shared_ptr
<
imperative
::
VarBase
>&
X
,
const
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>&
Ws
,
const
std
::
vector
<
std
::
shared_ptr
<
imperative
::
VarBase
>>&
Bs
,
const
paddle
::
platform
::
Place
&
place
,
bool
accuracy_check
=
false
);
}
// namespace imperative
}
// namespace paddle
paddle/fluid/eager/tests/task_tests/CMakeLists.txt
浏览文件 @
7df301f2
...
...
@@ -4,3 +4,4 @@ cc_test(test_egr_task_forward_autograd SRCS forward_autograd_test.cc DEPS ${eage
cc_test
(
test_egr_task_backward SRCS backward_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_hook SRCS hook_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
0 → 100644
浏览文件 @
7df301f2
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/tensor_meta.h"
#include "paddle/fluid/eager/tests/test_utils.h"
// TODO(jiabin): remove nolint here!!!
using
namespace
egr
;
// NOLINT
namespace
eager_test
{
egr
::
EagerTensor
hook_function
(
const
egr
::
EagerTensor
&
t
)
{
auto
t_dense
=
std
::
dynamic_pointer_cast
<
pten
::
DenseTensor
>
(
t
.
impl
());
auto
ret_meta
=
pten
::
DenseTensorMeta
(
t_dense
->
dtype
(),
t_dense
->
dims
(),
t_dense
->
layout
());
auto
place
=
t_dense
->
place
();
size_t
bytes_size
=
paddle
::
framework
::
product
(
t_dense
->
dims
())
*
SizeOf
(
t_dense
->
dtype
());
auto
ret_dense
=
std
::
make_shared
<
pten
::
DenseTensor
>
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
paddle
::
memory
::
Alloc
(
place
,
bytes_size
),
0
),
std
::
move
(
ret_meta
));
float
*
t_ptr
=
t_dense
->
mutable_data
<
float
>
();
float
*
ret_ptr
=
ret_dense
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
ret_dense
->
numel
();
i
++
)
{
ret_ptr
[
i
]
=
t_ptr
[
i
]
+
5.0
;
}
auto
ret_impl
=
std
::
dynamic_pointer_cast
<
pten
::
TensorBase
>
(
ret_dense
);
egr
::
EagerTensor
ret
=
egr
::
EagerTensor
();
ret
.
set_impl
(
ret_impl
);
return
ret
;
}
TEST
(
FwdBwdJoint
,
SingleNode
)
{
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
// 3. Run Forward
float
scale
=
2.0
;
float
bias
=
3.0
;
egr
::
EagerTensor
out
=
egr
::
scale
(
tensor
,
scale
,
bias
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Examine Forward Output
CompareTensorWithValue
<
float
>
(
out
,
13.0
);
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out
};
// 4. Run Backward
RunBackward
(
outs
,
{});
VLOG
(
7
)
<<
"Target Grad is: "
<<
std
::
static_pointer_cast
<
pten
::
DenseTensor
>
(
EagerUtils
::
unsafe_autograd_meta
(
tensor
)
->
Grad
().
impl
())
->
data
<
float
>
()[
0
];
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
2.0
);
}
/*
inp
|
Node0
|
Node1
|
out
*/
TEST
(
FwdBwdJoint
,
LinearNodes
)
{
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
// 3. Run Forward
// Run Forward Node 0
float
scale0
=
2.0
;
float
bias0
=
3.0
;
egr
::
EagerTensor
out0
=
egr
::
scale
(
tensor
,
scale0
,
bias0
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 1
float
scale1
=
5.0
;
float
bias1
=
10.0
;
egr
::
EagerTensor
out1
=
egr
::
scale
(
out0
,
scale1
,
bias1
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Examine Forward Output 0
CompareTensorWithValue
<
float
>
(
out0
,
13.0
);
// Examine Forward Output 1
CompareTensorWithValue
<
float
>
(
out1
,
75.0
);
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out1
};
// 4. Run Backward
RunBackward
(
outs
,
{});
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
10.0
);
}
/*
inp
|
Node0
____|____
| |
Node1 Node2
| |
out1 out2
*/
TEST
(
FwdBwdJoint
,
BranchedNodes
)
{
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
// 3. Run Forward
// Run Forward Node 0
float
scale0
=
2.0
;
float
bias0
=
3.0
;
egr
::
EagerTensor
out0
=
egr
::
scale
(
tensor
,
scale0
,
bias0
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 1
float
scale1
=
5.0
;
float
bias1
=
10.0
;
egr
::
EagerTensor
out1
=
egr
::
scale
(
out0
,
scale1
,
bias1
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 2
float
scale2
=
10.0
;
float
bias2
=
20.0
;
egr
::
EagerTensor
out2
=
egr
::
scale
(
out0
,
scale2
,
bias2
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Examine Forward Output 0
CompareTensorWithValue
<
float
>
(
out0
,
13.0
);
// Examine Forward Output 1
CompareTensorWithValue
<
float
>
(
out1
,
75.0
);
// Examine Forward Output 2
{
auto
dense_out
=
std
::
dynamic_pointer_cast
<
pten
::
DenseTensor
>
(
out2
.
impl
());
float
*
ptr
=
dense_out
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
20
;
i
++
)
{
PADDLE_ENFORCE
(
ptr
[
i
]
==
150.0
,
paddle
::
platform
::
errors
::
Fatal
(
"Detected numerical Error, Expected %f but got %f"
,
150.0
,
ptr
[
i
]));
}
}
// 4. Run Backward
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out1
,
out2
};
RunBackward
(
outs
,
{});
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
}
/*
inp
|
Node0
____|____
| |
Node1 Node2
| |
out1 out2
*/
TEST
(
FwdBwdJoint
,
GradientHook
)
{
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
std
::
function
<
egr
::
EagerTensor
(
const
egr
::
EagerTensor
&
)
>
hook
=
&
hook_function
;
// 3. Run Forward
// Run Forward Node 0
float
scale0
=
2.0
;
float
bias0
=
3.0
;
egr
::
EagerTensor
out0
=
egr
::
scale
(
tensor
,
scale0
,
bias0
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
RetainGradForTensor
(
out0
);
// hook: +5
RegisterGradientHookForTensor
(
out0
,
hook
);
// hook: +5
// Run Forward Node 1
float
scale1
=
5.0
;
float
bias1
=
10.0
;
egr
::
EagerTensor
out1
=
egr
::
scale
(
out0
,
scale1
,
bias1
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
RetainGradForTensor
(
out1
);
// hook: +5
RegisterGradientHookForTensor
(
out1
,
hook
);
// hook: +5
// Run Forward Node 2
float
scale2
=
10.0
;
float
bias2
=
20.0
;
egr
::
EagerTensor
out2
=
egr
::
scale
(
out0
,
scale2
,
bias2
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
RetainGradForTensor
(
out2
);
// hook: +5
RegisterGradientHookForTensor
(
out2
,
hook
);
// hook: +5
// 4. Run Backward
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out1
,
out2
};
RunBackward
(
outs
,
{});
// Examine Backward Grad
// leaf grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
190.0
);
// out0 grad
CompareGradTensorWithValue
<
float
>
(
out0
,
90.0
);
// out1 grad
CompareGradTensorWithValue
<
float
>
(
out1
,
1.0
);
// out2 grad
CompareGradTensorWithValue
<
float
>
(
out2
,
1.0
);
}
/*
inp
|
Node0
____|____
| |
Node1 Node2
| |
out1 out2
*/
TEST
(
FwdBwdJoint
,
CrossBatchAccumulation
)
{
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
// 3. Run Forward
// Run Forward Node 0
float
scale0
=
2.0
;
float
bias0
=
3.0
;
egr
::
EagerTensor
out0
=
egr
::
scale
(
tensor
,
scale0
,
bias0
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 1
float
scale1
=
5.0
;
float
bias1
=
10.0
;
egr
::
EagerTensor
out1
=
egr
::
scale
(
out0
,
scale1
,
bias1
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 2
float
scale2
=
10.0
;
float
bias2
=
20.0
;
egr
::
EagerTensor
out2
=
egr
::
scale
(
out0
,
scale2
,
bias2
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// 4. Run Backward
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out1
,
out2
};
RunBackward
(
outs
,
{});
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
// Cross Batch Accumulation
RunBackward
(
outs
,
{});
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
60.0
);
}
/* ---------------------------------------------------- */
/* ---------------------- CUDA Tests ------------------ */
/* ---------------------------------------------------- */
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST
(
FwdBwdJoint
,
SingleNodeCUDA
)
{
InitEnv
(
paddle
::
platform
::
CUDAPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CUDAPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
// 3. Run Forward
float
scale
=
2.0
;
float
bias
=
3.0
;
egr
::
EagerTensor
out
=
egr
::
scale
(
tensor
,
scale
,
bias
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Examine Forward Output
CompareTensorWithValue
<
float
>
(
out
,
13.0
);
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out
};
// 4. Run Backward
RunBackward
(
outs
,
{});
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
2.0
);
}
/*
inp
|
Node0
____|____
| |
Node1 Node2
| |
out1 out2
*/
TEST
(
FwdBwdJoint
,
BranchedNodesCUDA
)
{
InitEnv
(
paddle
::
platform
::
CUDAPlace
());
// 1. Prepare Input
paddle
::
framework
::
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
4
,
16
,
16
,
32
});
egr
::
EagerTensor
tensor
=
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CUDAPlace
(),
pten
::
DataType
::
FLOAT32
,
pten
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
true
/*is_leaf*/
);
RetainGradForTensor
(
tensor
);
// 3. Run Forward
// Run Forward Node 0
float
scale0
=
2.0
;
float
bias0
=
3.0
;
egr
::
EagerTensor
out0
=
egr
::
scale
(
tensor
,
scale0
,
bias0
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 1
float
scale1
=
5.0
;
float
bias1
=
10.0
;
egr
::
EagerTensor
out1
=
egr
::
scale
(
out0
,
scale1
,
bias1
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Run Forward Node 2
float
scale2
=
10.0
;
float
bias2
=
20.0
;
egr
::
EagerTensor
out2
=
egr
::
scale
(
out0
,
scale2
,
bias2
,
true
/*bias_after_scale*/
,
true
/*trace_backward*/
);
// Examine Forward Output 0
CompareTensorWithValue
<
float
>
(
out0
,
13.0
);
// Examine Forward Output 1
CompareTensorWithValue
<
float
>
(
out1
,
75.0
);
// Examine Forward Output 2
CompareTensorWithValue
<
float
>
(
out2
,
150.0
);
// TODO(jiabin): fix this with add functor
// 4. Run Backward
std
::
vector
<
egr
::
EagerTensor
>
outs
=
{
out1
,
out2
};
RunBackward
(
outs
,
{});
// Examine Backward Grad
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
}
#endif
}
// namespace eager_test
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录