Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
eafd4280
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
eafd4280
编写于
7月 27, 2022
作者:
W
Wang Bojun
提交者:
GitHub
7月 27, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Phi average accumulates migration (#44554)
* move average_accumulates op to phi kernel
上级
122fff46
变更
13
显示空白变更内容
内联
并排
Showing
13 changed file
with
510 addition
and
299 deletion
+510
-299
paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py
...auto_code_generator/final_state_generator/python_c_gen.py
+2
-0
paddle/fluid/operators/average_accumulates_op.cc
paddle/fluid/operators/average_accumulates_op.cc
+11
-89
paddle/fluid/operators/average_accumulates_op.cu
paddle/fluid/operators/average_accumulates_op.cu
+0
-90
paddle/fluid/operators/average_accumulates_op.h
paddle/fluid/operators/average_accumulates_op.h
+0
-119
paddle/phi/api/yaml/legacy_api.yaml
paddle/phi/api/yaml/legacy_api.yaml
+11
-0
paddle/phi/infermeta/multiary.cc
paddle/phi/infermeta/multiary.cc
+62
-0
paddle/phi/infermeta/multiary.h
paddle/phi/infermeta/multiary.h
+17
-0
paddle/phi/kernels/average_accumulates_kernel.h
paddle/phi/kernels/average_accumulates_kernel.h
+57
-0
paddle/phi/kernels/cpu/average_accumulates_kernel.cc
paddle/phi/kernels/cpu/average_accumulates_kernel.cc
+56
-0
paddle/phi/kernels/gpu/average_accumulates_kernel.cu
paddle/phi/kernels/gpu/average_accumulates_kernel.cu
+100
-0
paddle/phi/kernels/impl/average_accumulates_kernel_impl.h
paddle/phi/kernels/impl/average_accumulates_kernel_impl.h
+146
-0
paddle/phi/ops/compat/average_accumulates_sig.cc
paddle/phi/ops/compat/average_accumulates_sig.cc
+39
-0
python/paddle/incubate/optimizer/modelaverage.py
python/paddle/incubate/optimizer/modelaverage.py
+9
-1
未找到文件。
paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py
浏览文件 @
eafd4280
...
...
@@ -57,6 +57,8 @@ no_amp_list = [
'adam'
,
'adamw_'
,
'adamw'
,
'average_accumulates'
,
'average_accumulates_'
,
'decayed_adagrad_'
,
'decayed_adagrad'
,
'dgc_momentum_'
,
...
...
paddle/fluid/operators/average_accumulates_op.cc
浏览文件 @
eafd4280
...
...
@@ -12,99 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/average_accumulates_op.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/phi/infermeta/multiary.h"
namespace
paddle
{
namespace
operators
{
template
<
>
void
GetAccumulators
<
phi
::
CPUContext
>
(
const
framework
::
ExecutionContext
&
ctx
,
int64_t
*
num_updates
,
int64_t
*
num_accumulates
,
int64_t
*
old_num_accumulates
)
{
auto
*
in_old_num_accumulates
=
ctx
.
Input
<
Tensor
>
(
"in_old_num_accumulates"
);
auto
*
in_num_accumulates
=
ctx
.
Input
<
Tensor
>
(
"in_num_accumulates"
);
auto
*
in_num_updates
=
ctx
.
Input
<
Tensor
>
(
"in_num_updates"
);
*
old_num_accumulates
=
in_old_num_accumulates
->
data
<
int64_t
>
()[
0
];
*
num_accumulates
=
in_num_accumulates
->
data
<
int64_t
>
()[
0
];
*
num_updates
=
in_num_updates
->
data
<
int64_t
>
()[
0
];
}
template
<
>
void
SetAccumulators
<
phi
::
CPUContext
>
(
const
framework
::
ExecutionContext
&
ctx
,
int64_t
num_updates
,
int64_t
num_accumulates
,
int64_t
old_num_accumulates
)
{
auto
*
out_old_num_accumulates
=
ctx
.
Output
<
Tensor
>
(
"out_old_num_accumulates"
);
auto
*
out_num_accumulates
=
ctx
.
Output
<
Tensor
>
(
"out_num_accumulates"
);
auto
*
out_num_updates
=
ctx
.
Output
<
Tensor
>
(
"out_num_updates"
);
out_old_num_accumulates
->
data
<
int64_t
>
()[
0
]
=
old_num_accumulates
;
out_num_accumulates
->
data
<
int64_t
>
()[
0
]
=
num_accumulates
;
out_num_updates
->
data
<
int64_t
>
()[
0
]
=
num_updates
;
}
class
AverageAccumulatesOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"param"
),
"Input"
,
"param"
,
"AverageAccumulates"
);
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"in_sum_1"
),
"Input"
,
"in_sum_1"
,
"AverageAccumulates"
);
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"in_sum_2"
),
"Input"
,
"in_sum_2"
,
"AverageAccumulates"
);
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"in_sum_3"
),
"Input"
,
"in_sum_3"
,
"AverageAccumulates"
);
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"in_num_accumulates"
),
"Input"
,
"in_num_accumulates"
,
"AverageAccumulates"
);
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"in_old_num_accumulates"
),
"Input"
,
"in_old_num_accumulates"
,
"AverageAccumulates"
);
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"in_num_updates"
),
"Input"
,
"in_num_updates"
,
"AverageAccumulates"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"out_sum_1"
),
"Output"
,
"out_sum_1"
,
"AverageAccumulates"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"out_sum_2"
),
"Output"
,
"out_sum_2"
,
"AverageAccumulates"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"out_sum_3"
),
"Output"
,
"out_sum_3"
,
"AverageAccumulates"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"out_num_accumulates"
),
"Output"
,
"out_num_accumulates"
,
"AverageAccumulates"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"out_old_num_accumulates"
),
"Output"
,
"out_old_num_accumulates"
,
"AverageAccumulates"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"out_num_updates"
),
"Output"
,
"out_num_updates"
,
"AverageAccumulates"
);
auto
in_dim
=
ctx
->
GetInputDim
(
"param"
);
ctx
->
SetOutputDim
(
"out_sum_1"
,
in_dim
);
ctx
->
SetOutputDim
(
"out_sum_2"
,
in_dim
);
ctx
->
SetOutputDim
(
"out_sum_3"
,
in_dim
);
ctx
->
SetOutputDim
(
"out_num_accumulates"
,
{
1
});
ctx
->
SetOutputDim
(
"out_old_num_accumulates"
,
{
1
});
ctx
->
SetOutputDim
(
"out_num_updates"
,
{
1
});
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -209,12 +129,14 @@ And for a mini-batch in training, accumulators were computed as below steps:
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
DECLARE_INFER_SHAPE_FUNCTOR
(
average_accumulates
,
AverageAccumulatesInferShapeFunctor
,
PD_INFER_META
(
phi
::
AverageAccumulatesInferMeta
));
REGISTER_OPERATOR
(
average_accumulates
,
ops
::
AverageAccumulatesOp
,
ops
::
AverageAccumulatesOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OP_CPU_KERNEL
(
average_accumulates
,
ops
::
AverageAccumulatesKernel
<
phi
::
CPUContext
,
float
>
,
ops
::
AverageAccumulatesKernel
<
phi
::
CPUContext
,
double
>
);
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
,
AverageAccumulatesInferShapeFunctor
);
paddle/fluid/operators/average_accumulates_op.cu
已删除
100644 → 0
浏览文件 @
122fff46
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/average_accumulates_op.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
namespace
paddle
{
namespace
operators
{
template
<
>
void
GetAccumulators
<
paddle
::
platform
::
CUDADeviceContext
>
(
const
framework
::
ExecutionContext
&
ctx
,
int64_t
*
num_updates_
,
int64_t
*
num_accumulates_
,
int64_t
*
old_num_accumulates_
)
{
auto
*
in_old_num_accumulates
=
ctx
.
Input
<
Tensor
>
(
"in_old_num_accumulates"
);
auto
*
in_num_accumulates
=
ctx
.
Input
<
Tensor
>
(
"in_num_accumulates"
);
auto
*
in_num_updates
=
ctx
.
Input
<
Tensor
>
(
"in_num_updates"
);
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
auto
cuda_place
=
in_old_num_accumulates
->
place
();
memory
::
Copy
(
platform
::
CPUPlace
(),
old_num_accumulates_
,
cuda_place
,
in_old_num_accumulates
->
data
<
int64_t
>
(),
sizeof
(
int64_t
),
stream
);
memory
::
Copy
(
platform
::
CPUPlace
(),
num_accumulates_
,
cuda_place
,
in_num_accumulates
->
data
<
int64_t
>
(),
sizeof
(
int64_t
),
stream
);
memory
::
Copy
(
platform
::
CPUPlace
(),
num_updates_
,
cuda_place
,
in_num_updates
->
data
<
int64_t
>
(),
sizeof
(
int64_t
),
stream
);
}
template
<
>
void
SetAccumulators
<
paddle
::
platform
::
CUDADeviceContext
>
(
const
framework
::
ExecutionContext
&
ctx
,
int64_t
num_updates_
,
int64_t
num_accumulates_
,
int64_t
old_num_accumulates_
)
{
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
auto
*
out_old_num_accumulates
=
ctx
.
Output
<
Tensor
>
(
"out_old_num_accumulates"
);
auto
*
out_num_accumulates
=
ctx
.
Output
<
Tensor
>
(
"out_num_accumulates"
);
auto
*
out_num_updates
=
ctx
.
Output
<
Tensor
>
(
"out_num_updates"
);
auto
cuda_place
=
out_old_num_accumulates
->
place
();
memory
::
Copy
(
cuda_place
,
out_old_num_accumulates
->
data
<
int64_t
>
(),
platform
::
CPUPlace
(),
&
old_num_accumulates_
,
sizeof
(
int64_t
),
stream
);
memory
::
Copy
(
cuda_place
,
out_num_accumulates
->
data
<
int64_t
>
(),
platform
::
CPUPlace
(),
&
num_accumulates_
,
sizeof
(
int64_t
),
stream
);
memory
::
Copy
(
cuda_place
,
out_num_updates
->
data
<
int64_t
>
(),
platform
::
CPUPlace
(),
&
num_updates_
,
sizeof
(
int64_t
),
stream
);
}
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
average_accumulates
,
ops
::
AverageAccumulatesKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
AverageAccumulatesKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/average_accumulates_op.h
已删除
100644 → 0
浏览文件 @
122fff46
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
template
<
typename
DeviceContext
>
void
GetAccumulators
(
const
framework
::
ExecutionContext
&
ctx
,
int64_t
*
num_updates
,
int64_t
*
num_accumulates
,
int64_t
*
old_num_accumulates
);
template
<
typename
DeviceContext
>
void
SetAccumulators
(
const
framework
::
ExecutionContext
&
ctx
,
int64_t
num_updates
,
int64_t
num_accumulates
,
int64_t
old_num_accumulates
);
template
<
typename
DeviceContext
,
typename
T
>
class
AverageAccumulatesKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
// It is used to avoid loss of precision
static
const
int64_t
kMaxNumAccumulates
=
16384
;
// Get accumulators from input
int64_t
num_updates
=
0
;
int64_t
num_accumulates
=
0
;
int64_t
old_num_accumulates
=
0
;
GetAccumulators
<
DeviceContext
>
(
ctx
,
&
num_updates
,
&
num_accumulates
,
&
old_num_accumulates
);
// Get attrs
float
average_window
=
ctx
.
Attr
<
float
>
(
"average_window"
);
int64_t
max_average_window
=
ctx
.
Attr
<
int64_t
>
(
"max_average_window"
);
int64_t
min_average_window
=
ctx
.
Attr
<
int64_t
>
(
"min_average_window"
);
PADDLE_ENFORCE_LE
(
min_average_window
,
max_average_window
,
platform
::
errors
::
InvalidArgument
(
"The min_average_window > "
"max_average_window is not right, min_average_window is %ld, "
"max_average_window is %ld."
,
min_average_window
,
max_average_window
));
// Get inputs
auto
*
param
=
ctx
.
Input
<
Tensor
>
(
"param"
);
auto
*
in_sum_1
=
ctx
.
Input
<
Tensor
>
(
"in_sum_1"
);
auto
*
in_sum_2
=
ctx
.
Input
<
Tensor
>
(
"in_sum_2"
);
auto
*
in_sum_3
=
ctx
.
Input
<
Tensor
>
(
"in_sum_3"
);
auto
param_tensor
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param
);
auto
in_sum_1_tensor
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
in_sum_1
);
auto
in_sum_2_tensor
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
in_sum_2
);
auto
in_sum_3_tensor
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
in_sum_3
);
// Get outputs
auto
*
out_sum_1
=
ctx
.
Output
<
Tensor
>
(
"out_sum_1"
);
auto
*
out_sum_2
=
ctx
.
Output
<
Tensor
>
(
"out_sum_2"
);
auto
*
out_sum_3
=
ctx
.
Output
<
Tensor
>
(
"out_sum_3"
);
auto
out_sum_1_tensor
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out_sum_1
);
auto
out_sum_2_tensor
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out_sum_2
);
auto
out_sum_3_tensor
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out_sum_3
);
// Compute
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
phi
::
funcs
::
SetConstant
<
DeviceContext
,
T
>
constant_functor
;
++
num_updates
;
++
num_accumulates
;
out_sum_1_tensor
.
device
(
place
)
=
in_sum_1_tensor
+
param_tensor
;
out_sum_2_tensor
.
device
(
place
)
=
in_sum_2_tensor
;
out_sum_3_tensor
.
device
(
place
)
=
in_sum_3_tensor
;
if
(
num_updates
%
kMaxNumAccumulates
==
0
)
{
// Move the sum to a different buffer to avoid loss of precision due to
// too many sums.
out_sum_2_tensor
.
device
(
place
)
=
in_sum_2_tensor
+
in_sum_1_tensor
;
constant_functor
(
ctx
.
template
device_context
<
DeviceContext
>(),
out_sum_1
,
0.0
);
}
if
(
num_accumulates
>=
min_average_window
&&
num_accumulates
>=
std
::
min
<
int64_t
>
(
max_average_window
,
num_updates
*
average_window
))
{
// Now the average window is too long, discard the old sum.
out_sum_3_tensor
.
device
(
place
)
=
in_sum_1_tensor
+
in_sum_2_tensor
;
constant_functor
(
ctx
.
template
device_context
<
DeviceContext
>(),
out_sum_1
,
0.0
);
constant_functor
(
ctx
.
template
device_context
<
DeviceContext
>(),
out_sum_2
,
0.0
);
old_num_accumulates
=
num_accumulates
;
num_accumulates
=
0
;
}
// Set accumulators to output
SetAccumulators
<
DeviceContext
>
(
ctx
,
num_updates
,
num_accumulates
,
old_num_accumulates
);
}
};
}
// namespace operators
}
// namespace paddle
paddle/phi/api/yaml/legacy_api.yaml
浏览文件 @
eafd4280
...
...
@@ -264,6 +264,17 @@
kernel
:
func
:
auc
#average_accumulates
-
api
:
average_accumulates_
args
:
(Tensor param, Tensor in_sum_1, Tensor in_sum_2, Tensor in_sum_3, Tensor in_num_accumulates, Tensor in_old_num_accumulates, Tensor in_num_updates, float average_window, int64_t max_average_window, int64_t min_average_window)
output
:
Tensor(out_sum_1), Tensor(out_sum_2), Tensor(out_sum_3), Tensor(out_num_accumulates), Tensor(out_old_num_accumulates), Tensor(out_num_updates)
infer_meta
:
func
:
AverageAccumulatesInferMeta
kernel
:
func
:
average_accumulates {dense, dense, dense, dense, dense ,dense, dense -> dense, dense, dense, dense, dense, dense}
data_type
:
param
inplace
:
(in_sum_1 -> out_sum_1), (in_sum_2 -> out_sum_2), (in_sum_3 -> out_sum_3), (in_num_accumulates -> out_num_accumulates), (in_old_num_accumulates -> out_old_num_accumulates), (in_num_updates -> out_num_updates)
# batch_norm
-
api
:
batch_norm
args
:
(Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
...
...
paddle/phi/infermeta/multiary.cc
浏览文件 @
eafd4280
...
...
@@ -434,6 +434,68 @@ void AucInferMeta(const MetaTensor& input,
}
}
void
AverageAccumulatesInferMeta
(
const
MetaTensor
&
param
,
const
MetaTensor
&
in_sum_1
,
const
MetaTensor
&
in_sum_2
,
const
MetaTensor
&
in_sum_3
,
const
MetaTensor
&
in_num_accumulates
,
const
MetaTensor
&
in_old_num_accumulates
,
const
MetaTensor
&
in_num_updates
,
float
average_window
,
int64_t
max_average_window
,
int64_t
min_average_window
,
MetaTensor
*
out_sum_1
,
MetaTensor
*
out_sum_2
,
MetaTensor
*
out_sum_3
,
MetaTensor
*
out_num_accumulates
,
MetaTensor
*
out_old_num_accumulates
,
MetaTensor
*
out_num_updates
)
{
// auto in_dim = param.dims;
PADDLE_ENFORCE_NE
(
out_sum_1
,
nullptr
,
errors
::
NotFound
(
"Output(out_sum_1) of AverageAccumulates should not be null."
));
PADDLE_ENFORCE_NE
(
out_sum_2
,
nullptr
,
errors
::
NotFound
(
"Output(out_sum_2) of AverageAccumulates should not be null."
));
PADDLE_ENFORCE_NE
(
out_sum_3
,
nullptr
,
errors
::
NotFound
(
"Output(out_sum_3) of AverageAccumulates should not be null."
));
PADDLE_ENFORCE_NE
(
out_num_accumulates
,
nullptr
,
errors
::
NotFound
(
"Output(out_num_accumulates) of "
"AverageAccumulates should not be null."
));
PADDLE_ENFORCE_NE
(
out_old_num_accumulates
,
nullptr
,
errors
::
NotFound
(
"Output(out_old_num_accumulates) of "
"AverageAccumulates should not be null."
));
PADDLE_ENFORCE_NE
(
out_num_updates
,
nullptr
,
errors
::
NotFound
(
"Output(out_num_updates) of AverageAccumulates should not be null."
));
out_sum_1
->
set_dims
(
in_sum_1
.
dims
());
out_sum_1
->
set_dtype
(
in_sum_1
.
dtype
());
out_sum_2
->
set_dims
(
in_sum_2
.
dims
());
out_sum_2
->
set_dtype
(
in_sum_2
.
dtype
());
out_sum_3
->
set_dims
(
in_sum_3
.
dims
());
out_sum_3
->
set_dtype
(
in_sum_3
.
dtype
());
out_num_accumulates
->
set_dims
({
1
});
out_num_accumulates
->
set_dtype
(
in_num_accumulates
.
dtype
());
out_old_num_accumulates
->
set_dims
({
1
});
out_old_num_accumulates
->
set_dtype
(
in_old_num_accumulates
.
dtype
());
out_num_updates
->
set_dims
({
1
});
out_num_updates
->
set_dtype
(
in_num_updates
.
dtype
());
}
void
BatchNormInferMeta
(
const
MetaTensor
&
x
,
const
MetaTensor
&
scale
,
const
MetaTensor
&
bias
,
...
...
paddle/phi/infermeta/multiary.h
浏览文件 @
eafd4280
...
...
@@ -134,6 +134,23 @@ void AucInferMeta(const MetaTensor& input,
MetaTensor
*
stat_neg_out
,
MetaConfig
config
=
MetaConfig
());
void
AverageAccumulatesInferMeta
(
const
MetaTensor
&
param
,
const
MetaTensor
&
in_sum_1
,
const
MetaTensor
&
in_sum_2
,
const
MetaTensor
&
in_sum_3
,
const
MetaTensor
&
in_num_accumulates
,
const
MetaTensor
&
in_old_num_accumulates
,
const
MetaTensor
&
in_num_updates
,
float
average_window
,
int64_t
max_average_window
,
int64_t
min_average_window
,
MetaTensor
*
out_sum_1
,
MetaTensor
*
out_sum_2
,
MetaTensor
*
out_sum_3
,
MetaTensor
*
out_num_accumulates
,
MetaTensor
*
out_old_num_accumulates
,
MetaTensor
*
out_num_updates
);
void
BatchNormInferMeta
(
const
MetaTensor
&
x
,
const
MetaTensor
&
scale
,
const
MetaTensor
&
bias
,
...
...
paddle/phi/kernels/average_accumulates_kernel.h
0 → 100644
浏览文件 @
eafd4280
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
Context
>
void
GetAccumulators
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
in_num_accumulates
,
const
DenseTensor
&
in_old_num_accumulates
,
const
DenseTensor
&
in_num_updates
,
int64_t
*
num_updates
,
int64_t
*
num_accumulates
,
int64_t
*
old_num_accumulates
);
template
<
typename
Context
>
void
SetAccumulators
(
const
Context
&
dev_ctx
,
int64_t
num_updates
,
int64_t
num_accumulates
,
int64_t
old_num_accumulates
,
DenseTensor
*
out_num_accumulates
,
DenseTensor
*
out_old_num_accumulates
,
DenseTensor
*
out_num_updates
);
template
<
typename
T
,
typename
Context
>
void
AverageAccumulatesKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
param
,
const
DenseTensor
&
in_sum_1
,
const
DenseTensor
&
in_sum_2
,
const
DenseTensor
&
in_sum_3
,
const
DenseTensor
&
in_num_accumulates
,
const
DenseTensor
&
in_old_num_accumulates
,
const
DenseTensor
&
in_num_updates
,
float
average_window
,
int64_t
max_average_window
,
int64_t
min_average_window
,
DenseTensor
*
out_sum_1
,
DenseTensor
*
out_sum_2
,
DenseTensor
*
out_sum_3
,
DenseTensor
*
out_num_accumulates
,
DenseTensor
*
out_old_num_accumulates
,
DenseTensor
*
out_num_updates
);
}
// namespace phi
paddle/phi/kernels/cpu/average_accumulates_kernel.cc
0 → 100644
浏览文件 @
eafd4280
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/average_accumulates_kernel.h"
#include "paddle/phi/kernels/impl/average_accumulates_kernel_impl.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
>
void
GetAccumulators
<
phi
::
CPUContext
>
(
const
phi
::
CPUContext
&
dev_ctx
,
const
DenseTensor
&
in_num_accumulates
,
const
DenseTensor
&
in_old_num_accumulates
,
const
DenseTensor
&
in_num_updates
,
int64_t
*
num_updates
,
int64_t
*
num_accumulates
,
int64_t
*
old_num_accumulates
)
{
*
old_num_accumulates
=
in_old_num_accumulates
.
data
<
int64_t
>
()[
0
];
*
num_accumulates
=
in_num_accumulates
.
data
<
int64_t
>
()[
0
];
*
num_updates
=
in_num_updates
.
data
<
int64_t
>
()[
0
];
}
template
<
>
void
SetAccumulators
<
phi
::
CPUContext
>
(
const
phi
::
CPUContext
&
dev_ctx
,
int64_t
num_updates
,
int64_t
num_accumulates
,
int64_t
old_num_accumulates
,
DenseTensor
*
out_num_accumulates
,
DenseTensor
*
out_old_num_accumulates
,
DenseTensor
*
out_num_updates
)
{
out_old_num_accumulates
->
data
<
int64_t
>
()[
0
]
=
old_num_accumulates
;
out_num_accumulates
->
data
<
int64_t
>
()[
0
]
=
num_accumulates
;
out_num_updates
->
data
<
int64_t
>
()[
0
]
=
num_updates
;
}
}
// namespace phi
PD_REGISTER_KERNEL
(
average_accumulates
,
CPU
,
ALL_LAYOUT
,
phi
::
AverageAccumulatesKernel
,
float
,
double
)
{}
paddle/phi/kernels/gpu/average_accumulates_kernel.cu
0 → 100644
浏览文件 @
eafd4280
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/average_accumulates_kernel.h"
#include "paddle/phi/kernels/impl/average_accumulates_kernel_impl.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
>
void
GetAccumulators
<
phi
::
GPUContext
>
(
const
phi
::
GPUContext
&
dev_ctx
,
const
DenseTensor
&
in_num_accumulates
,
const
DenseTensor
&
in_old_num_accumulates
,
const
DenseTensor
&
in_num_updates
,
int64_t
*
num_updates
,
int64_t
*
num_accumulates
,
int64_t
*
old_num_accumulates
)
{
auto
stream
=
dev_ctx
.
stream
();
auto
cuda_place
=
in_old_num_accumulates
.
place
();
paddle
::
memory
::
Copy
(
phi
::
CPUPlace
(),
old_num_accumulates
,
cuda_place
,
in_old_num_accumulates
.
data
<
int64_t
>
(),
sizeof
(
int64_t
),
stream
);
paddle
::
memory
::
Copy
(
phi
::
CPUPlace
(),
num_accumulates
,
cuda_place
,
in_num_accumulates
.
data
<
int64_t
>
(),
sizeof
(
int64_t
),
stream
);
paddle
::
memory
::
Copy
(
phi
::
CPUPlace
(),
num_updates
,
cuda_place
,
in_num_updates
.
data
<
int64_t
>
(),
sizeof
(
int64_t
),
stream
);
}
template
<
>
void
SetAccumulators
<
phi
::
GPUContext
>
(
const
phi
::
GPUContext
&
dev_ctx
,
int64_t
num_updates
,
int64_t
num_accumulates
,
int64_t
old_num_accumulates
,
DenseTensor
*
out_num_accumulates
,
DenseTensor
*
out_old_num_accumulates
,
DenseTensor
*
out_num_updates
)
{
int64_t
*
out_num_accumulates_ptr
=
dev_ctx
.
template
Alloc
<
int64_t
>(
out_num_accumulates
);
int64_t
*
out_old_num_accumulates_ptr
=
dev_ctx
.
template
Alloc
<
int64_t
>(
out_old_num_accumulates
);
int64_t
*
out_num_updates_ptr
=
dev_ctx
.
template
Alloc
<
int64_t
>(
out_num_updates
);
auto
stream
=
dev_ctx
.
stream
();
auto
cuda_place
=
out_old_num_accumulates
->
place
();
paddle
::
memory
::
Copy
(
dev_ctx
.
GetPlace
(),
out_num_accumulates_ptr
,
phi
::
CPUPlace
(),
&
num_accumulates
,
sizeof
(
int64_t
),
stream
);
paddle
::
memory
::
Copy
(
dev_ctx
.
GetPlace
(),
out_old_num_accumulates_ptr
,
phi
::
CPUPlace
(),
&
old_num_accumulates
,
sizeof
(
int64_t
),
stream
);
paddle
::
memory
::
Copy
(
cuda_place
,
out_num_updates_ptr
,
phi
::
CPUPlace
(),
&
num_updates
,
sizeof
(
int64_t
),
stream
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
average_accumulates
,
GPU
,
ALL_LAYOUT
,
phi
::
AverageAccumulatesKernel
,
float
,
double
)
{}
paddle/phi/kernels/impl/average_accumulates_kernel_impl.h
0 → 100644
浏览文件 @
eafd4280
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/kernels/average_accumulates_kernel.h"
#include <algorithm>
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
AverageAccumulatesKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
param
,
const
DenseTensor
&
in_sum_1
,
const
DenseTensor
&
in_sum_2
,
const
DenseTensor
&
in_sum_3
,
const
DenseTensor
&
in_num_accumulates
,
const
DenseTensor
&
in_old_num_accumulates
,
const
DenseTensor
&
in_num_updates
,
float
average_window
,
int64_t
max_average_window
,
int64_t
min_average_window
,
DenseTensor
*
out_sum_1
,
DenseTensor
*
out_sum_2
,
DenseTensor
*
out_sum_3
,
DenseTensor
*
out_num_accumulates
,
DenseTensor
*
out_old_num_accumulates
,
DenseTensor
*
out_num_updates
)
{
// It is used to avoid loss of precision
static
const
int64_t
kMaxNumAccumulates
=
16384
;
// Get accumulators from input
// int64_t num_updates = 0;
// int64_t num_accumulates = 0;
// int64_t old_num_accumulates = 0;
auto
num_updates_cpu
=
paddle
::
memory
::
Alloc
(
phi
::
CPUPlace
(),
sizeof
(
int64_t
));
int64_t
*
num_updates_cpu_ptr
=
reinterpret_cast
<
int64_t
*>
(
num_updates_cpu
->
ptr
());
auto
num_accumulates_cpu
=
paddle
::
memory
::
Alloc
(
phi
::
CPUPlace
(),
sizeof
(
int64_t
));
int64_t
*
num_accumulates_cpu_ptr
=
reinterpret_cast
<
int64_t
*>
(
num_accumulates_cpu
->
ptr
());
auto
old_num_accumulates_cpu
=
paddle
::
memory
::
Alloc
(
phi
::
CPUPlace
(),
sizeof
(
int64_t
));
int64_t
*
old_num_accumulates_cpu_ptr
=
reinterpret_cast
<
int64_t
*>
(
old_num_accumulates_cpu
->
ptr
());
GetAccumulators
<
Context
>
(
dev_ctx
,
in_num_accumulates
,
in_old_num_accumulates
,
in_num_updates
,
num_updates_cpu_ptr
,
num_accumulates_cpu_ptr
,
old_num_accumulates_cpu_ptr
);
// Get attrs
// float average_window = ctx.Attr<float>("average_window");
// int64_t max_average_window = ctx.Attr<int64_t>("max_average_window");
// int64_t min_average_window = ctx.Attr<int64_t>("min_average_window");
PADDLE_ENFORCE_LE
(
min_average_window
,
max_average_window
,
errors
::
InvalidArgument
(
"The min_average_window > "
"max_average_window is not right, min_average_window is %ld, "
"max_average_window is %ld."
,
min_average_window
,
max_average_window
));
// Get inputs
// auto* param = ctx.Input<Tensor>("param");
// auto* in_sum_1 = ctx.Input<Tensor>("in_sum_1");
// auto* in_sum_2 = ctx.Input<Tensor>("in_sum_2");
// auto* in_sum_3 = ctx.Input<Tensor>("in_sum_3");
auto
param_tensor
=
EigenVector
<
T
>::
Flatten
(
param
);
auto
in_sum_1_tensor
=
EigenVector
<
T
>::
Flatten
(
in_sum_1
);
auto
in_sum_2_tensor
=
EigenVector
<
T
>::
Flatten
(
in_sum_2
);
auto
in_sum_3_tensor
=
EigenVector
<
T
>::
Flatten
(
in_sum_3
);
// Get outputs
// auto* out_sum_1 = ctx.Output<Tensor>("out_sum_1");
// auto* out_sum_2 = ctx.Output<Tensor>("out_sum_2");
// auto* out_sum_3 = ctx.Output<Tensor>("out_sum_3");
dev_ctx
.
template
Alloc
<
T
>(
out_sum_1
);
dev_ctx
.
template
Alloc
<
T
>(
out_sum_2
);
dev_ctx
.
template
Alloc
<
T
>(
out_sum_3
);
auto
out_sum_1_tensor
=
EigenVector
<
T
>::
Flatten
(
*
out_sum_1
);
auto
out_sum_2_tensor
=
EigenVector
<
T
>::
Flatten
(
*
out_sum_2
);
auto
out_sum_3_tensor
=
EigenVector
<
T
>::
Flatten
(
*
out_sum_3
);
// Compute
// auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
auto
&
place
=
*
dev_ctx
.
eigen_device
();
funcs
::
SetConstant
<
Context
,
T
>
constant_functor
;
++
(
*
num_updates_cpu_ptr
);
++
(
*
num_accumulates_cpu_ptr
);
out_sum_1_tensor
.
device
(
place
)
=
in_sum_1_tensor
+
param_tensor
;
out_sum_2_tensor
.
device
(
place
)
=
in_sum_2_tensor
;
out_sum_3_tensor
.
device
(
place
)
=
in_sum_3_tensor
;
if
((
*
num_updates_cpu_ptr
)
%
kMaxNumAccumulates
==
0
)
{
// Move the sum to a different buffer to avoid loss of precision due to
// too many sums.
out_sum_2_tensor
.
device
(
place
)
=
in_sum_2_tensor
+
in_sum_1_tensor
;
constant_functor
(
dev_ctx
,
out_sum_1
,
static_cast
<
T
>
(
0
));
}
if
((
*
num_accumulates_cpu_ptr
)
>=
min_average_window
&&
(
*
num_accumulates_cpu_ptr
)
>=
std
::
min
<
int64_t
>
(
max_average_window
,
(
*
num_updates_cpu_ptr
)
*
average_window
))
{
// Now the average window is too long, discard the old sum.
out_sum_3_tensor
.
device
(
place
)
=
in_sum_1_tensor
+
in_sum_2_tensor
;
constant_functor
(
dev_ctx
,
out_sum_1
,
static_cast
<
T
>
(
0
));
constant_functor
(
dev_ctx
,
out_sum_2
,
static_cast
<
T
>
(
0
));
(
*
old_num_accumulates_cpu_ptr
)
=
(
*
num_accumulates_cpu_ptr
);
(
*
num_accumulates_cpu_ptr
)
=
0
;
}
// Set accumulators to output
SetAccumulators
<
Context
>
(
dev_ctx
,
*
num_updates_cpu_ptr
,
*
num_accumulates_cpu_ptr
,
*
old_num_accumulates_cpu_ptr
,
out_num_accumulates
,
out_old_num_accumulates
,
out_num_updates
);
}
}
// namespace phi
paddle/phi/ops/compat/average_accumulates_sig.cc
0 → 100644
浏览文件 @
eafd4280
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/core/compat/op_utils.h"
namespace
phi
{
KernelSignature
AverageAccumulatesOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"average_accumulates"
,
{
"param"
,
"in_sum_1"
,
"in_sum_2"
,
"in_sum_3"
,
"in_num_accumulates"
,
"in_old_num_accumulates"
,
"in_num_updates"
},
{
"average_window"
,
"max_average_window"
,
"min_average_window"
},
{
"out_sum_1"
,
"out_sum_2"
,
"out_sum_3"
,
"out_num_accumulates"
,
"out_old_num_accumulates"
,
"out_num_updates"
});
}
}
// namespace phi
PD_REGISTER_ARG_MAPPING_FN
(
average_accumulates
,
phi
::
AverageAccumulatesOpArgumentMapping
);
python/paddle/incubate/optimizer/modelaverage.py
浏览文件 @
eafd4280
...
...
@@ -21,6 +21,7 @@ import numpy as np
from
paddle.fluid.dygraph
import
base
as
imperative_base
from
paddle.fluid.wrapped_decorator
import
signature_safe_contextmanager
from
paddle
import
_C_ops
from
paddle.fluid.framework
import
in_dygraph_mode
__all__
=
[]
...
...
@@ -231,7 +232,14 @@ class ModelAverage(Optimizer):
old_num_accumulates
=
self
.
_get_accumulator
(
'old_num_accumulates'
,
param_and_grad
[
0
])
num_updates
=
self
.
_get_accumulator
(
'num_updates'
,
param_and_grad
[
0
])
if
framework
.
_non_static_mode
():
if
in_dygraph_mode
():
_
,
_
,
_
,
_
,
_
,
_
=
_C_ops
.
final_state_average_accumulates_
(
param_and_grad
[
0
],
sum_1
,
sum_2
,
sum_3
,
num_accumulates
,
old_num_accumulates
,
num_updates
,
self
.
average_window
,
self
.
max_average_window
,
self
.
min_average_window
)
return
None
elif
framework
.
_non_static_mode
():
_
,
_
,
_
,
_
,
_
,
_
=
_C_ops
.
average_accumulates
(
param_and_grad
[
0
],
sum_1
,
sum_2
,
sum_3
,
num_accumulates
,
old_num_accumulates
,
num_updates
,
sum_1
,
sum_2
,
sum_3
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录