Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
6d830f6c
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6d830f6c
编写于
3月 11, 2022
作者:
Z
Zhanlue Yang
提交者:
GitHub
3月 11, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Added Final State Matmul_v2 to C++ performance test (#40391)
上级
47459e98
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
114 addition
and
11 deletion
+114
-11
paddle/fluid/eager/CMakeLists.txt
paddle/fluid/eager/CMakeLists.txt
+1
-1
paddle/fluid/eager/autograd_meta.h
paddle/fluid/eager/autograd_meta.h
+1
-2
paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc
...luid/eager/tests/performance_tests/benchmark_eager_cpu.cc
+41
-0
paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc
...uid/eager/tests/performance_tests/benchmark_eager_cuda.cc
+44
-0
paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
...le/fluid/eager/tests/performance_tests/benchmark_utils.cc
+24
-0
paddle/fluid/eager/tests/performance_tests/benchmark_utils.h
paddle/fluid/eager/tests/performance_tests/benchmark_utils.h
+3
-8
未找到文件。
paddle/fluid/eager/CMakeLists.txt
浏览文件 @
6d830f6c
set
(
eager_deps phi_api hook_utils tensor_utils utils global_utils backward phi_tensor tracer layer autograd_meta grad_node_info grad_tensor_holder accumulation_node
)
set
(
fluid_deps tracer layer proto_desc operator op_registry variable_helper memcpy
)
set
(
generated_deps dygraph_function dygraph_node
)
set
(
generated_deps
final_dygraph_function final_dygraph_node
dygraph_function dygraph_node
)
if
(
NOT
((
NOT WITH_PYTHON
)
AND ON_INFER
))
message
(
"Performing Eager Dygraph Auto Code Generation"
)
...
...
paddle/fluid/eager/autograd_meta.h
浏览文件 @
6d830f6c
...
...
@@ -145,8 +145,7 @@ class AutogradMeta : public AbstractAutogradMeta {
private:
// TODO(jiabin) :Should we use pointer instead of object?
std
::
shared_ptr
<
paddle
::
experimental
::
Tensor
>
grad_
{
std
::
make_shared
<
paddle
::
experimental
::
Tensor
>
(
egr
::
Controller
::
Instance
().
GenerateUniqueName
(
"@grad"
))};
std
::
make_shared
<
paddle
::
experimental
::
Tensor
>
()};
// GradNodeBase is base class of all grad op which is a
// wrapper for grad op. This class will make grad op easy
...
...
paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc
浏览文件 @
6d830f6c
...
...
@@ -80,6 +80,47 @@ TEST(Benchmark, EagerScaleCPU) {
}
}
TEST
(
Benchmark
,
EagerMatmulCPU
)
{
// Prepare Device Contexts
eager_test
::
InitEnv
(
paddle
::
platform
::
CPUPlace
());
for
(
const
std
::
string
&
mode
:
{
"Accuracy"
,
"Performance"
})
{
paddle
::
framework
::
DDim
ddimX
=
phi
::
make_ddim
({
2
,
2
});
paddle
::
experimental
::
Tensor
X
=
CreateTensorWithValue
(
ddimX
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
,
true
);
RetainGradForTensor
(
X
);
paddle
::
framework
::
DDim
ddimY
=
phi
::
make_ddim
({
2
,
2
});
paddle
::
experimental
::
Tensor
Y
=
CreateTensorWithValue
(
ddimY
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
2.0
,
true
);
RetainGradForTensor
(
Y
);
if
(
mode
==
"Accuracy"
)
{
benchmark_eager_matmul
(
X
,
Y
,
true
/* accuracy_check */
);
}
else
if
(
mode
==
"Performance"
)
{
auto
t_start
=
std
::
chrono
::
high_resolution_clock
::
now
();
#ifdef WITH_GPERFTOOLS
ProfilerStart
(
"eager_matmul_cpu.out"
);
#endif
benchmark_eager_matmul
(
X
,
Y
);
#ifdef WITH_GPERFTOOLS
ProfilerStop
();
#endif
auto
t_end
=
std
::
chrono
::
high_resolution_clock
::
now
();
double
elapsed_time_ms
=
std
::
chrono
::
duration
<
double
,
std
::
milli
>
(
t_end
-
t_start
).
count
();
std
::
cout
<<
"Duration: "
<<
elapsed_time_ms
<<
" ms"
<<
std
::
endl
;
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Fatal
(
"Unknown benchmark mode"
));
}
}
}
TEST
(
Benchmark
,
EagerIntermediateMatmulCPU
)
{
// Prepare Device Contexts
eager_test
::
InitEnv
(
paddle
::
platform
::
CPUPlace
());
...
...
paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc
浏览文件 @
6d830f6c
...
...
@@ -82,6 +82,50 @@ TEST(Benchmark, EagerScaleCUDA) {
}
}
TEST
(
Benchmark
,
EagerMatmulCUDA
)
{
paddle
::
platform
::
CUDAPlace
place
;
eager_test
::
InitEnv
(
place
);
for
(
const
std
::
string
&
mode
:
{
"Accuracy"
,
"WarmUp"
,
"Performance"
})
{
paddle
::
framework
::
DDim
ddimX
=
phi
::
make_ddim
({
2
,
2
});
paddle
::
experimental
::
Tensor
X
=
CreateTensorWithValue
(
ddimX
,
paddle
::
platform
::
CUDAPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
,
true
);
RetainGradForTensor
(
X
);
paddle
::
framework
::
DDim
ddimY
=
phi
::
make_ddim
({
2
,
2
});
paddle
::
experimental
::
Tensor
Y
=
CreateTensorWithValue
(
ddimY
,
paddle
::
platform
::
CUDAPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
2.0
,
true
);
RetainGradForTensor
(
Y
);
if
(
mode
==
"Accuracy"
)
{
benchmark_eager_matmul
(
X
,
Y
,
true
/* accuracy_check */
);
}
else
if
(
mode
==
"WarmUp"
)
{
benchmark_eager_matmul
(
X
,
Y
);
}
else
if
(
mode
==
"Performance"
)
{
auto
t_start
=
std
::
chrono
::
high_resolution_clock
::
now
();
#ifdef WITH_GPERFTOOLS
ProfilerStart
(
"eager_matmul_cuda.out"
);
#endif
benchmark_eager_matmul
(
X
,
Y
);
#ifdef WITH_GPERFTOOLS
ProfilerStop
();
#endif
auto
t_end
=
std
::
chrono
::
high_resolution_clock
::
now
();
double
elapsed_time_ms
=
std
::
chrono
::
duration
<
double
,
std
::
milli
>
(
t_end
-
t_start
).
count
();
std
::
cout
<<
"Duration: "
<<
elapsed_time_ms
<<
" ms"
<<
std
::
endl
;
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Fatal
(
"Unknown benchmark mode"
));
}
}
}
TEST
(
Benchmark
,
EagerIntermediateMatmulCUDA
)
{
paddle
::
platform
::
CUDAPlace
place
;
eager_test
::
InitEnv
(
place
);
...
...
paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
浏览文件 @
6d830f6c
...
...
@@ -28,6 +28,7 @@
#include "paddle/fluid/eager/utils.h"
// Eager Generated
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
// Fluid
...
...
@@ -67,6 +68,29 @@ void benchmark_eager_scale(const paddle::experimental::Tensor& tensor,
}
}
void
benchmark_eager_matmul
(
const
paddle
::
experimental
::
Tensor
&
X
,
const
paddle
::
experimental
::
Tensor
&
Y
,
bool
accuracy_check
)
{
paddle
::
experimental
::
Tensor
input_tensor0
=
X
;
size_t
max_num_runs
=
accuracy_check
?
2
:
max_num_benchmark_runs
;
for
(
size_t
i
=
0
;
i
<
max_num_runs
;
i
++
)
{
input_tensor0
=
matmul_final_state_dygraph_function
(
input_tensor0
,
Y
,
false
,
false
);
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor0
};
RunBackward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
// Examine Forward Grad (w.r.t max_num_runs = 2)
eager_test
::
CompareTensorWithValue
<
float
>
(
input_tensor0
,
16
);
// Examine Backward Grad (w.r.t max_num_runs = 2)
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
16
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
Y
,
16
);
}
}
/* ----------------------------------- */
/* ---- Eager Intermediate Matmul ---- */
/* ----------------------------------- */
...
...
paddle/fluid/eager/tests/performance_tests/benchmark_utils.h
浏览文件 @
6d830f6c
...
...
@@ -51,15 +51,10 @@ void benchmark_eager_scale(const paddle::experimental::Tensor& tensor,
bool
accuracy_check
=
false
);
/* ---- Eager MatMul ---- */
/*
void benchmark_eager_matmul(const paddle::experimental::Tensor& X, const
paddle::experimental::Tensor& Y,
void
benchmark_eager_matmul
(
const
paddle
::
experimental
::
Tensor
&
X
,
const
paddle
::
experimental
::
Tensor
&
Y
,
bool
accuracy_check
=
false
);
void benchmark_eager_mlp(const paddle::experimental::Tensor& X,
const std::vector<paddle::experimental::Tensor>& Ws,
const std::vector<paddle::experimental::Tensor>& Bs,
bool accuracy_check = false);
*/
void
benchmark_eager_intermediate_matmul
(
const
paddle
::
experimental
::
Tensor
&
X
,
const
paddle
::
experimental
::
Tensor
&
Y
,
bool
accuracy_check
=
false
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录