Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
b143e008
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b143e008
编写于
10月 27, 2022
作者:
Z
zhangkaihuo
提交者:
GitHub
10月 27, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[cherry-pick] add batch_norm_kernel (#47394)
* cherry-pick #46359 and resolve conflict
上级
99cec1a6
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
657 addition
and
110 deletion
+657
-110
paddle/fluid/operators/sparse_manual_op.cc
paddle/fluid/operators/sparse_manual_op.cc
+47
-0
paddle/phi/api/yaml/sparse_backward.yaml
paddle/phi/api/yaml/sparse_backward.yaml
+12
-0
paddle/phi/api/yaml/sparse_ops.yaml
paddle/phi/api/yaml/sparse_ops.yaml
+10
-0
paddle/phi/infermeta/multiary.cc
paddle/phi/infermeta/multiary.cc
+1
-0
paddle/phi/kernels/sparse/batch_norm_grad_kernel.cc
paddle/phi/kernels/sparse/batch_norm_grad_kernel.cc
+108
-0
paddle/phi/kernels/sparse/batch_norm_grad_kernel.h
paddle/phi/kernels/sparse/batch_norm_grad_kernel.h
+48
-0
paddle/phi/kernels/sparse/batch_norm_kernel.cc
paddle/phi/kernels/sparse/batch_norm_kernel.cc
+117
-0
paddle/phi/kernels/sparse/batch_norm_kernel.h
paddle/phi/kernels/sparse/batch_norm_kernel.h
+47
-0
paddle/phi/ops/compat/sparse_manual_op_sig.cc
paddle/phi/ops/compat/sparse_manual_op_sig.cc
+27
-0
python/paddle/fluid/tests/unittests/test_sparse_norm_op.py
python/paddle/fluid/tests/unittests/test_sparse_norm_op.py
+86
-23
python/paddle/sparse/nn/layer/norm.py
python/paddle/sparse/nn/layer/norm.py
+154
-87
未找到文件。
paddle/fluid/operators/sparse_manual_op.cc
浏览文件 @
b143e008
...
...
@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/binary.h"
#include "paddle/phi/infermeta/multiary.h"
#include "paddle/phi/infermeta/sparse/binary.h"
#include "paddle/phi/infermeta/sparse/unary.h"
#include "paddle/phi/infermeta/unary.h"
...
...
@@ -185,6 +186,47 @@ DECLARE_INFER_SHAPE_FUNCTOR(sparse_add,
SparseAddInferShapeFunctor
,
PD_INFER_META
(
phi
::
UnchangedInferMeta
));
class
SparseBatchNormOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"x"
,
"(Tensor), input 0 of sparse_batch_norm op."
);
AddInput
(
"scale"
,
"(Tensor), input 1 of sparse_batch_norm op."
);
AddInput
(
"bias"
,
"(Tensor), input 2 of sparse_batch_norm op."
);
AddInput
(
"mean"
,
"(Tensor), input 3 of sparse_batch_norm op."
);
AddInput
(
"variance"
,
"(Tensor), input 4 of sparse_batch_norm op."
);
AddOutput
(
"y"
,
"(Tensor), output 0 of sparse_batch_norm op."
);
AddOutput
(
"mean_out"
,
"(Tensor), output 1 of sparse_batch_norm op."
);
AddOutput
(
"variance_out"
,
"(Tensor), output 2 of sparse_batch_norm op."
);
AddOutput
(
"saved_mean"
,
"(Tensor), output 3 of sparse_batch_norm op."
);
AddOutput
(
"saved_variance"
,
"(Tensor), output 4 of sparse_batch_norm op."
);
AddOutput
(
"reserve_space"
,
"(Tensor), output 5 of sparse_batch_norm op."
);
AddAttr
<
float
>
(
"momentum"
,
"(float), attribute 0 for sparse_batch_norm op."
);
AddAttr
<
float
>
(
"epsilon"
,
"(float), attribute 1 for sparse_batch_norm op."
);
AddAttr
<
std
::
string
>
(
"data_layout"
,
"(string), attribute 2 for sparse_batch_norm op."
);
AddAttr
<
bool
>
(
"is_test"
,
"(bool), attribute 3 for sparse_batch_norm op."
);
AddAttr
<
bool
>
(
"use_global_stats"
,
"(bool), attribute 4 for sparse_batch_norm op."
);
AddAttr
<
bool
>
(
"trainable_statistics"
,
"(bool), attribute 4 for sparse_batch_norm op."
);
AddAttr
<
bool
>
(
"fuse_with_relu"
,
"(bool), attribute 4 for sparse_batch_norm op."
);
AddComment
(
R"DOC(
TODO: Documentation of sparse_conv3d op.
)DOC"
);
}
};
class
SparseBatchNormOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
};
DECLARE_INFER_SHAPE_FUNCTOR
(
sparse_batch_norm
,
SparseBatchNormInferShapeFunctor
,
PD_INFER_META
(
phi
::
BatchNormInferMeta
));
}
// namespace operators
}
// namespace paddle
...
...
@@ -224,3 +266,8 @@ REGISTER_OPERATOR(sparse_add,
ops
::
SparseAddOp
,
ops
::
SparseAddOpMaker
,
ops
::
SparseAddInferShapeFunctor
);
REGISTER_OPERATOR
(
sparse_batch_norm
,
ops
::
SparseBatchNormOp
,
ops
::
SparseBatchNormOpMaker
,
ops
::
SparseBatchNormInferShapeFunctor
);
paddle/phi/api/yaml/sparse_backward.yaml
浏览文件 @
b143e008
...
...
@@ -100,6 +100,18 @@
func
:
atanh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
atanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
-
backward_op
:
batch_norm_grad
forward
:
batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args
:
(Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
output
:
Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
infer_meta
:
func
:
GeneralTernaryGradInferMeta
param
:
[
x
,
scale
,
bias
]
kernel
:
func
:
batch_norm_coo_grad {sparse_coo, dense, dense, dense, dense, dense, dense, dense, sparse_coo -> sparse_coo, dense, dense}
data_type
:
out_grad
optional
:
mean_out, variance_out, reserve_space
-
backward_op
:
cast_grad
forward
:
cast(Tensor x, DataType index_dtype, DataType value_dtype) -> Tensor(out)
args
:
(Tensor x, Tensor out_grad, DataType value_dtype)
...
...
paddle/phi/api/yaml/sparse_ops.yaml
浏览文件 @
b143e008
...
...
@@ -87,6 +87,16 @@
layout
:
x
backward
:
atanh_grad
-
op
:
batch_norm
args
:
(Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
output
:
Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
infer_meta
:
func
:
BatchNormInferMeta
kernel
:
func
:
batch_norm_coo {sparse_coo, dense, dense, dense, dense -> sparse_coo, dense, dense, dense, dense, dense}
data_type
:
x
backward
:
batch_norm_grad
-
op
:
cast
args
:
(Tensor x, DataType index_dtype=DataType::UNDEFINED, DataType value_dtype=DataType::UNDEFINED)
output
:
Tensor(out)
...
...
paddle/phi/infermeta/multiary.cc
浏览文件 @
b143e008
...
...
@@ -609,6 +609,7 @@ void BatchNormInferMeta(const MetaTensor& x,
saved_variance
->
set_dims
({
C
});
}
y
->
share_lod
(
x
);
y
->
set_dtype
(
x
.
dtype
());
}
void
BatchNormInferInferMeta
(
const
MetaTensor
&
x
,
...
...
paddle/phi/kernels/sparse/batch_norm_grad_kernel.cc
0 → 100644
浏览文件 @
b143e008
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/sparse/batch_norm_grad_kernel.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/batch_norm_grad_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
namespace
phi
{
namespace
sparse
{
template
<
typename
T
,
typename
Context
>
void
BatchNormCooGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
bias
,
const
paddle
::
optional
<
DenseTensor
>&
mean
,
const
paddle
::
optional
<
DenseTensor
>&
variance
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
const
paddle
::
optional
<
DenseTensor
>&
reserve_space
,
const
SparseCooTensor
&
y_grad
,
float
momentum
,
float
epsilon
,
const
std
::
string
&
data_layout
,
bool
is_test
,
bool
use_global_stats
,
bool
trainable_statistics
,
bool
fuse_with_relu
,
SparseCooTensor
*
x_grad
,
DenseTensor
*
scale_grad
,
DenseTensor
*
bias_grad
)
{
EmptyLikeCooKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
x_grad
);
*
scale_grad
=
phi
::
EmptyLike
<
T
,
Context
>
(
dev_ctx
,
scale
);
*
bias_grad
=
phi
::
EmptyLike
<
T
,
Context
>
(
dev_ctx
,
bias
);
phi
::
BatchNormGradKernel
<
T
,
Context
>
(
dev_ctx
,
x
.
values
(),
scale
,
bias
,
mean
,
variance
,
saved_mean
,
saved_variance
,
reserve_space
,
y_grad
.
values
(),
momentum
,
epsilon
,
data_layout
,
is_test
,
use_global_stats
,
trainable_statistics
,
fuse_with_relu
,
x_grad
->
mutable_values
(),
scale_grad
,
bias_grad
);
}
}
// namespace sparse
}
// namespace phi
PD_REGISTER_KERNEL
(
batch_norm_coo_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
BatchNormCooGradKernel
,
float
,
double
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
#if defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL
(
batch_norm_coo_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
BatchNormCooGradKernel
,
float
,
phi
::
dtype
::
float16
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
#endif
#if defined(PADDLE_WITH_CUDA)
PD_REGISTER_KERNEL
(
batch_norm_coo_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
BatchNormCooGradKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
if
(
kernel_key
.
dtype
()
==
phi
::
DataType
::
FLOAT16
)
{
kernel
->
OutputAt
(
0
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
// x_grad
kernel
->
OutputAt
(
1
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
// scale_grad
kernel
->
OutputAt
(
2
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
// bias_grad
}
}
#endif
paddle/phi/kernels/sparse/batch_norm_grad_kernel.h
0 → 100644
浏览文件 @
b143e008
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
namespace
phi
{
namespace
sparse
{
template
<
typename
T
,
typename
Context
>
void
BatchNormCooGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
bias
,
const
paddle
::
optional
<
DenseTensor
>&
mean
,
const
paddle
::
optional
<
DenseTensor
>&
variance
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
const
paddle
::
optional
<
DenseTensor
>&
reserve_space
,
const
SparseCooTensor
&
y_grad
,
float
momentum
,
float
epsilon
,
const
std
::
string
&
data_layout
,
bool
is_test
,
bool
use_global_stats
,
bool
trainable_statistics
,
bool
fuse_with_relu
,
SparseCooTensor
*
x_grad
,
DenseTensor
*
scale_grad
,
DenseTensor
*
bias_grad
);
}
// namespace sparse
}
// namespace phi
paddle/phi/kernels/sparse/batch_norm_kernel.cc
0 → 100644
浏览文件 @
b143e008
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/sparse/batch_norm_kernel.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/batch_norm_kernel.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
namespace
phi
{
namespace
sparse
{
template
<
typename
T
,
typename
Context
>
void
BatchNormCooKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
bias
,
const
DenseTensor
&
mean
,
const
DenseTensor
&
variance
,
float
momentum
,
float
epsilon
,
const
std
::
string
&
data_layout
,
bool
is_test
,
bool
use_global_stats
,
bool
trainable_statistics
,
bool
fuse_with_relu
,
SparseCooTensor
*
y
,
DenseTensor
*
mean_out
,
DenseTensor
*
variance_out
,
DenseTensor
*
saved_mean
,
DenseTensor
*
saved_variance
,
DenseTensor
*
reserve_space
)
{
EmptyLikeCooKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
y
);
phi
::
BatchNormKernel
<
T
,
Context
>
(
dev_ctx
,
x
.
values
(),
scale
,
bias
,
mean
,
variance
,
momentum
,
epsilon
,
data_layout
,
is_test
,
use_global_stats
,
trainable_statistics
,
fuse_with_relu
,
y
->
mutable_values
(),
mean_out
,
variance_out
,
saved_mean
,
saved_variance
,
reserve_space
);
y
->
SetIndicesDict
(
x
.
GetIndicesDict
());
}
}
// namespace sparse
}
// namespace phi
PD_REGISTER_KERNEL
(
batch_norm_coo
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
BatchNormCooKernel
,
float
,
double
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
#if defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL
(
batch_norm_coo
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
BatchNormCooKernel
,
float
,
phi
::
dtype
::
float16
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
kernel
->
InputAt
(
1
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
InputAt
(
2
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
InputAt
(
3
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
InputAt
(
4
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
OutputAt
(
1
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
OutputAt
(
2
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
OutputAt
(
3
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
OutputAt
(
4
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
}
#endif
#if defined(PADDLE_WITH_CUDA)
PD_REGISTER_KERNEL
(
batch_norm_coo
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
BatchNormCooKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
if
(
kernel_key
.
dtype
()
==
phi
::
DataType
::
FLOAT16
)
{
kernel
->
InputAt
(
1
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
InputAt
(
2
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
InputAt
(
3
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
InputAt
(
4
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
OutputAt
(
1
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
OutputAt
(
2
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
OutputAt
(
3
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
kernel
->
OutputAt
(
4
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
}
}
#endif
paddle/phi/kernels/sparse/batch_norm_kernel.h
0 → 100644
浏览文件 @
b143e008
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
namespace
phi
{
namespace
sparse
{
template
<
typename
T
,
typename
Context
>
void
BatchNormKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
bias
,
const
DenseTensor
&
mean
,
const
DenseTensor
&
variance
,
float
momentum
,
float
epsilon
,
const
std
::
string
&
data_layout
,
bool
is_test
,
bool
use_global_stats
,
bool
trainable_statistics
,
bool
fuse_with_relu
,
SparseCooTensor
*
y
,
DenseTensor
*
mean_out
,
DenseTensor
*
variance_out
,
DenseTensor
*
saved_mean
,
DenseTensor
*
saved_variance
,
DenseTensor
*
reserve_space
);
}
// namespace sparse
}
// namespace phi
paddle/phi/ops/compat/sparse_manual_op_sig.cc
浏览文件 @
b143e008
...
...
@@ -82,6 +82,29 @@ KernelSignature SparseAddOpArgumentMapping(const ArgumentMappingContext& ctx) {
}
}
KernelSignature
SparseBatchNormOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
if
(
ctx
.
IsSparseCooTensorInput
(
"x"
))
{
return
KernelSignature
(
"batch_norm_coo"
,
{
"x"
,
"scale"
,
"bias"
,
"mean"
,
"variance"
},
{
"momentum"
,
"epsilon"
,
"data_layout"
,
"is_test"
,
"use_global_stats"
,
"trainable_statistics"
,
"fuse_with_relu"
},
{
"y"
,
"mean_out"
,
"variance_out"
,
"saved_mean"
,
"saved_variance"
,
"reserve_space"
});
}
else
{
return
KernelSignature
(
"unregistered"
,
{},
{},
{});
}
}
}
// namespace phi
PD_REGISTER_BASE_KERNEL_NAME
(
sparse_sparse_coo_tensor
,
sparse_coo_tensor
);
...
...
@@ -106,3 +129,7 @@ PD_REGISTER_ARG_MAPPING_FN(sparse_conv3d, phi::SparseConv3dOpArgumentMapping);
PD_REGISTER_BASE_KERNEL_NAME
(
sparse_add
,
add_coo_coo
);
PD_REGISTER_ARG_MAPPING_FN
(
sparse_add
,
phi
::
SparseAddOpArgumentMapping
);
PD_REGISTER_BASE_KERNEL_NAME
(
sparse_batch_norm
,
batch_norm_coo
);
PD_REGISTER_ARG_MAPPING_FN
(
sparse_batch_norm
,
phi
::
SparseBatchNormOpArgumentMapping
);
python/paddle/fluid/tests/unittests/test_sparse_norm_op.py
浏览文件 @
b143e008
...
...
@@ -17,18 +17,18 @@ import unittest
import
numpy
as
np
import
paddle
from
paddle.sparse
import
nn
import
paddle.sparse
as
sparse
import
paddle.fluid
as
fluid
import
copy
class
TestSparseBatchNorm
(
unittest
.
TestCase
):
def
test
(
self
):
fluid
.
set_flags
({
"FLAGS_retain_grad_for_all_tensor"
:
True
})
paddle
.
seed
(
0
)
channels
=
4
shape
=
[
2
,
3
,
6
,
6
,
channels
]
#there is no zero in dense_x
#
there is no zero in dense_x
dense_x
=
paddle
.
randn
(
shape
)
dense_x
.
stop_gradient
=
False
...
...
@@ -48,17 +48,21 @@ class TestSparseBatchNorm(unittest.TestCase):
sparse_y
=
sparse_batch_norm
(
sparse_x
)
# compare the result with dense batch_norm
assert
np
.
allclose
(
dense_y
.
flatten
().
numpy
(),
sparse_y
.
values
().
flatten
().
numpy
(),
atol
=
1e-5
,
rtol
=
1e-5
)
assert
np
.
allclose
(
dense_y
.
flatten
().
numpy
(),
sparse_y
.
values
().
flatten
().
numpy
(),
atol
=
1e-5
,
rtol
=
1e-5
,
)
# test backward
sparse_y
.
backward
(
sparse_y
)
assert
np
.
allclose
(
dense_x
.
grad
.
flatten
().
numpy
(),
sparse_x
.
grad
.
values
().
flatten
().
numpy
(),
atol
=
1e-5
,
rtol
=
1e-5
)
assert
np
.
allclose
(
dense_x
.
grad
.
flatten
().
numpy
(),
sparse_x
.
grad
.
values
().
flatten
().
numpy
(),
atol
=
1e-5
,
rtol
=
1e-5
,
)
fluid
.
set_flags
({
"FLAGS_retain_grad_for_all_tensor"
:
False
})
def
test_error_layout
(
self
):
...
...
@@ -66,8 +70,9 @@ class TestSparseBatchNorm(unittest.TestCase):
shape
=
[
2
,
3
,
6
,
6
,
3
]
x
=
paddle
.
randn
(
shape
)
sparse_x
=
x
.
to_sparse_coo
(
4
)
sparse_batch_norm
=
paddle
.
sparse
.
nn
.
BatchNorm
(
3
,
data_format
=
'NCDHW'
)
sparse_batch_norm
=
paddle
.
sparse
.
nn
.
BatchNorm
(
3
,
data_format
=
'NCDHW'
)
sparse_batch_norm
(
sparse_x
)
def
test2
(
self
):
...
...
@@ -86,10 +91,10 @@ class TestSparseBatchNorm(unittest.TestCase):
class
TestSyncBatchNorm
(
unittest
.
TestCase
):
def
test_sync_batch_norm
(
self
):
x
=
np
.
array
([[[[
0.3
,
0.4
],
[
0.3
,
0.07
]],
[[
0.83
,
0.37
],
[
0.18
,
0.93
]]]]).
astype
(
'float32'
)
x
=
np
.
array
(
[[[[
0.3
,
0.4
],
[
0.3
,
0.07
]],
[[
0.83
,
0.37
],
[
0.18
,
0.93
]]]]
).
astype
(
'float32'
)
x
=
paddle
.
to_tensor
(
x
)
sparse_x
=
x
.
to_sparse_coo
(
len
(
x
.
shape
)
-
1
)
...
...
@@ -100,23 +105,81 @@ class TestSyncBatchNorm(unittest.TestCase):
dense_sync_bn
=
paddle
.
nn
.
SyncBatchNorm
(
2
)
x
=
x
.
reshape
((
-
1
,
x
.
shape
[
-
1
]))
dense_hidden
=
dense_sync_bn
(
x
)
assert
np
.
allclose
(
sparse_hidden
.
values
().
numpy
(),
dense_hidden
.
numpy
())
assert
np
.
allclose
(
sparse_hidden
.
values
().
numpy
(),
dense_hidden
.
numpy
()
)
def
test_convert
(
self
):
base_model
=
paddle
.
nn
.
Sequential
(
nn
.
Conv3D
(
3
,
5
,
3
),
nn
.
BatchNorm
(
5
),
nn
.
BatchNorm
(
5
))
base_model
=
paddle
.
nn
.
Sequential
(
nn
.
Conv3D
(
3
,
5
,
3
),
nn
.
BatchNorm
(
5
),
nn
.
BatchNorm
(
5
)
)
model
=
paddle
.
nn
.
Sequential
(
nn
.
Conv3D
(
3
,
5
,
3
),
nn
.
BatchNorm
(
5
),
nn
.
BatchNorm
(
5
,
weight_attr
=
fluid
.
ParamAttr
(
name
=
'bn.scale'
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
'bn.bias'
)))
nn
.
Conv3D
(
3
,
5
,
3
),
nn
.
BatchNorm
(
5
),
nn
.
BatchNorm
(
5
,
weight_attr
=
fluid
.
ParamAttr
(
name
=
'bn.scale'
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
'bn.bias'
),
),
)
model
=
nn
.
SyncBatchNorm
.
convert_sync_batchnorm
(
model
)
for
idx
,
sublayer
in
enumerate
(
base_model
.
sublayers
()):
if
isinstance
(
sublayer
,
nn
.
BatchNorm
):
self
.
assertEqual
(
isinstance
(
model
[
idx
],
nn
.
SyncBatchNorm
),
True
)
class
TestStatic
(
unittest
.
TestCase
):
def
test
(
self
):
paddle
.
enable_static
()
indices
=
paddle
.
static
.
data
(
name
=
'indices'
,
shape
=
[
4
,
4
],
dtype
=
'int32'
)
values
=
paddle
.
static
.
data
(
name
=
'values'
,
shape
=
[
4
,
1
],
dtype
=
'float32'
)
channels
=
1
dense_shape
=
[
1
,
1
,
3
,
4
,
channels
]
sp_x
=
sparse
.
sparse_coo_tensor
(
indices
,
values
,
dense_shape
)
sparse_batch_norm
=
paddle
.
sparse
.
nn
.
BatchNorm
(
channels
)
sp_y
=
sparse_batch_norm
(
sp_x
)
out
=
sp_y
.
to_dense
()
exe
=
paddle
.
static
.
Executor
()
indices_data
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
1
,
2
],
[
1
,
3
,
2
,
3
]]
values_data
=
np
.
array
([[
1.0
],
[
2.0
],
[
3.0
],
[
4.0
]]).
astype
(
'float32'
)
bias_data
=
np
.
array
([
1.0
]).
astype
(
'float32'
)
weight_data
=
np
.
array
([
2.0
]).
astype
(
'float32'
)
mean_data
=
np
.
array
([
1.0
]).
astype
(
'float32'
)
variance_data
=
np
.
array
([
2.0
]).
astype
(
'float32'
)
fetch
=
exe
.
run
(
feed
=
{
'indices'
:
indices_data
,
'values'
:
values_data
,
'batch_norm_0.b_0'
:
bias_data
,
'batch_norm_0.w_0'
:
weight_data
,
'batch_norm_0.w_1'
:
mean_data
,
'batch_norm_0.w_2'
:
variance_data
,
},
fetch_list
=
[
out
],
return_numpy
=
True
,
)
correct_out
=
np
.
array
(
[
[
[
[[
0.0
],
[
-
1.6832708
],
[
0.0
],
[
0.1055764
]],
[[
0.0
],
[
0.0
],
[
1.8944236
],
[
0.0
]],
[[
0.0
],
[
0.0
],
[
0.0
],
[
3.683271
]],
]
]
]
).
astype
(
'float32'
)
np
.
testing
.
assert_allclose
(
correct_out
,
fetch
[
0
],
rtol
=
1e-5
)
paddle
.
disable_static
()
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/sparse/nn/layer/norm.py
浏览文件 @
b143e008
...
...
@@ -12,23 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
import
warnings
from
paddle.nn.layer.norm
import
_BatchNormBase
from
paddle.framework
import
no_grad
from
paddle
import
_C_ops
,
in_dynamic_mode
from
paddle.fluid.layer_helper
import
LayerHelper
class
BatchNorm
(
paddle
.
nn
.
BatchNorm1D
):
...
...
@@ -108,57 +97,112 @@ class BatchNorm(paddle.nn.BatchNorm1D):
# [1, 6, 6, 6, 3]
"""
def
__init__
(
self
,
num_features
,
momentum
=
0.9
,
epsilon
=
1e-05
,
weight_attr
=
None
,
bias_attr
=
None
,
data_format
=
'NDHWC'
,
use_global_stats
=
None
,
name
=
None
):
super
(
BatchNorm
,
self
).
__init__
(
num_features
,
momentum
=
momentum
,
epsilon
=
epsilon
,
weight_attr
=
weight_attr
,
bias_attr
=
bias_attr
,
data_format
=
data_format
,
use_global_stats
=
use_global_stats
,
name
=
name
)
def
__init__
(
self
,
num_features
,
momentum
=
0.9
,
epsilon
=
1e-05
,
weight_attr
=
None
,
bias_attr
=
None
,
data_format
=
'NDHWC'
,
use_global_stats
=
None
,
name
=
None
,
):
super
(
BatchNorm
,
self
).
__init__
(
num_features
,
momentum
=
momentum
,
epsilon
=
epsilon
,
weight_attr
=
weight_attr
,
bias_attr
=
bias_attr
,
data_format
=
data_format
,
use_global_stats
=
use_global_stats
,
name
=
name
,
)
def
_check_data_format
(
self
,
input
):
if
input
!=
"NDHWC"
:
raise
ValueError
(
'sparse BatchNorm only support layout of "NDHWC"'
)
def
forward
(
self
,
input
):
values
=
input
.
values
()
self
.
_check_data_format
(
self
.
_data_format
)
if
len
(
values
.
shape
)
!=
2
:
raise
ValueError
(
'expected 2D input.values() (got {}D)'
.
format
(
len
(
values
.
shape
)))
if
self
.
training
:
warnings
.
warn
(
"When training, we now always track global mean and variance."
)
batch_norm_out
=
paddle
.
nn
.
functional
.
batch_norm
(
values
,
self
.
_mean
,
self
.
_variance
,
weight
=
self
.
weight
,
bias
=
self
.
bias
,
training
=
self
.
training
,
momentum
=
self
.
_momentum
,
epsilon
=
self
.
_epsilon
,
data_format
=
'NC'
,
use_global_stats
=
self
.
_use_global_stats
)
return
paddle
.
sparse
.
sparse_coo_tensor
(
input
.
indices
(),
batch_norm_out
,
shape
=
input
.
shape
,
stop_gradient
=
input
.
stop_gradient
)
"When training, we now always track global mean and variance."
)
if
self
.
_use_global_stats
==
None
:
self
.
_use_global_stats
=
not
self
.
training
trainable_statistics
=
False
else
:
trainable_statistics
=
not
self
.
_use_global_stats
data_format
=
'NCHW'
if
self
.
_data_format
[
1
]
==
'C'
else
'NHWC'
if
in_dynamic_mode
():
batch_norm_out
,
_
,
_
,
_
,
_
,
_
=
_C_ops
.
sparse_batch_norm
(
input
,
self
.
weight
,
self
.
bias
,
self
.
_mean
,
self
.
_variance
,
self
.
_momentum
,
self
.
_epsilon
,
data_format
,
not
self
.
training
,
self
.
_use_global_stats
,
trainable_statistics
,
False
,
)
return
batch_norm_out
else
:
inputs
=
{
'x'
:
input
,
'scale'
:
self
.
weight
,
'bias'
:
self
.
bias
,
'mean'
:
self
.
_mean
,
'variance'
:
self
.
_variance
,
}
attrs
=
{
'momentum'
:
self
.
_momentum
,
'epsilon'
:
self
.
_epsilon
,
'data_layout'
:
data_format
,
'is_test'
:
not
self
.
training
,
'use_global_stats'
:
self
.
_use_global_stats
,
'trainable_statistics'
:
trainable_statistics
,
'fuse_with_relu'
:
False
,
}
op_type
=
'sparse_batch_norm'
helper
=
LayerHelper
(
op_type
)
dtype
=
input
.
dtype
mean_out
=
helper
.
create_variable_for_type_inference
(
dtype
=
dtype
,
stop_gradient
=
True
)
variance_out
=
helper
.
create_variable_for_type_inference
(
dtype
=
dtype
,
stop_gradient
=
True
)
saved_mean
=
helper
.
create_variable_for_type_inference
(
dtype
=
dtype
,
stop_gradient
=
True
)
saved_variance
=
helper
.
create_variable_for_type_inference
(
dtype
=
dtype
,
stop_gradient
=
True
)
reserve_space
=
helper
.
create_variable_for_type_inference
(
dtype
=
dtype
,
stop_gradient
=
True
)
y
=
helper
.
create_sparse_variable_for_type_inference
(
dtype
)
outputs
=
{
"y"
:
y
,
"mean_out"
:
mean_out
,
"variance_out"
:
variance_out
,
"saved_mean"
:
saved_mean
,
"saved_variance"
:
saved_variance
,
"reserve_space"
:
reserve_space
,
}
helper
.
append_op
(
type
=
op_type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
attrs
)
return
y
class
SyncBatchNorm
(
paddle
.
nn
.
SyncBatchNorm
):
...
...
@@ -258,26 +302,34 @@ class SyncBatchNorm(paddle.nn.SyncBatchNorm):
# [-0.88415730, 1.57439375]])
"""
def
__init__
(
self
,
num_features
,
momentum
=
0.9
,
epsilon
=
1e-05
,
weight_attr
=
None
,
bias_attr
=
None
,
data_format
=
'NCHW'
,
name
=
None
):
super
(
SyncBatchNorm
,
self
).
__init__
(
num_features
,
momentum
,
epsilon
,
weight_attr
,
bias_attr
,
data_format
,
name
)
def
__init__
(
self
,
num_features
,
momentum
=
0.9
,
epsilon
=
1e-05
,
weight_attr
=
None
,
bias_attr
=
None
,
data_format
=
'NCHW'
,
name
=
None
,
):
super
(
SyncBatchNorm
,
self
).
__init__
(
num_features
,
momentum
,
epsilon
,
weight_attr
,
bias_attr
,
data_format
,
name
,
)
def
forward
(
self
,
x
):
assert
x
.
is_sparse_coo
(
assert
(
x
.
is_sparse_coo
()
),
"SyncBatchNorm only support SparseTensor in COO format."
out
=
super
(
SyncBatchNorm
,
self
).
forward
(
x
.
values
())
return
paddle
.
sparse
.
sparse_coo_tensor
(
x
.
indices
(),
out
,
shape
=
x
.
shape
,
stop_gradient
=
x
.
stop_gradient
)
return
paddle
.
sparse
.
sparse_coo_tensor
(
x
.
indices
(),
out
,
shape
=
x
.
shape
,
stop_gradient
=
x
.
stop_gradient
)
@
classmethod
def
convert_sync_batchnorm
(
cls
,
layer
):
...
...
@@ -303,27 +355,41 @@ class SyncBatchNorm(paddle.nn.SyncBatchNorm):
layer_output
=
layer
if
isinstance
(
layer
,
_BatchNormBase
):
if
layer
.
_weight_attr
!=
None
and
not
isinstance
(
layer
.
_weight_attr
,
bool
)
and
layer
.
_weight_attr
.
name
!=
None
:
if
(
layer
.
_weight_attr
!=
None
and
not
isinstance
(
layer
.
_weight_attr
,
bool
)
and
layer
.
_weight_attr
.
name
!=
None
):
layer
.
_weight_attr
.
name
=
layer
.
_weight_attr
.
name
+
'_sync'
if
layer
.
_bias_attr
!=
None
and
not
isinstance
(
layer
.
_bias_attr
,
bool
)
and
layer
.
_bias_attr
.
name
!=
None
:
if
(
layer
.
_bias_attr
!=
None
and
not
isinstance
(
layer
.
_bias_attr
,
bool
)
and
layer
.
_bias_attr
.
name
!=
None
):
layer
.
_bias_attr
.
name
=
layer
.
_bias_attr
.
name
+
'_sync'
#convert sparse BatchNorm
#
convert sparse BatchNorm
if
isinstance
(
layer
,
BatchNorm
):
layer_output
=
SyncBatchNorm
(
layer
.
_num_features
,
layer
.
_momentum
,
layer
.
_epsilon
,
layer
.
_weight_attr
,
layer
.
_bias_attr
,
layer
.
_data_format
,
layer
.
_name
)
#convert dense BatchNorm
layer_output
=
SyncBatchNorm
(
layer
.
_num_features
,
layer
.
_momentum
,
layer
.
_epsilon
,
layer
.
_weight_attr
,
layer
.
_bias_attr
,
layer
.
_data_format
,
layer
.
_name
,
)
# convert dense BatchNorm
else
:
layer_output
=
paddle
.
nn
.
SyncBatchNorm
(
layer
.
_num_features
,
layer
.
_momentum
,
layer
.
_epsilon
,
layer
.
_weight_attr
,
layer
.
_bias_attr
,
layer
.
_data_format
,
layer
.
_name
)
layer
.
_num_features
,
layer
.
_momentum
,
layer
.
_epsilon
,
layer
.
_weight_attr
,
layer
.
_bias_attr
,
layer
.
_data_format
,
layer
.
_name
,
)
if
layer
.
_weight_attr
!=
False
and
layer
.
_bias_attr
!=
False
:
with
no_grad
():
...
...
@@ -333,7 +399,8 @@ class SyncBatchNorm(paddle.nn.SyncBatchNorm):
layer_output
.
_variance
=
layer
.
_variance
for
name
,
sublayer
in
layer
.
named_children
():
layer_output
.
add_sublayer
(
name
,
cls
.
convert_sync_batchnorm
(
sublayer
))
layer_output
.
add_sublayer
(
name
,
cls
.
convert_sync_batchnorm
(
sublayer
)
)
del
layer
return
layer_output
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录