Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
672d56d7
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
672d56d7
编写于
6月 20, 2019
作者:
L
lijianshe02
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add server batch_norm kernel and unitest
上级
d1f8d02f
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
330 addition
and
0 deletion
+330
-0
paddle/fluid/lite/kernels/x86/CMakeLists.txt
paddle/fluid/lite/kernels/x86/CMakeLists.txt
+3
-0
paddle/fluid/lite/kernels/x86/batch_norm_compute.cc
paddle/fluid/lite/kernels/x86/batch_norm_compute.cc
+30
-0
paddle/fluid/lite/kernels/x86/batch_norm_compute.h
paddle/fluid/lite/kernels/x86/batch_norm_compute.h
+158
-0
paddle/fluid/lite/kernels/x86/batch_norm_compute_test.cc
paddle/fluid/lite/kernels/x86/batch_norm_compute_test.cc
+139
-0
未找到文件。
paddle/fluid/lite/kernels/x86/CMakeLists.txt
浏览文件 @
672d56d7
...
@@ -17,6 +17,7 @@ cc_library(dropout_compute_x86 SRCS dropout_compute.cc DEPS ${lite_kernel_deps}
...
@@ -17,6 +17,7 @@ cc_library(dropout_compute_x86 SRCS dropout_compute.cc DEPS ${lite_kernel_deps}
cc_library
(
concat_compute_x86 SRCS concat_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
concat_compute_x86 SRCS concat_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
conv_compute_x86 SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
blas im2col vol2col
)
cc_library
(
conv_compute_x86 SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
blas im2col vol2col
)
cc_library
(
pool_compute_x86 SRCS pool_compute.cc DEPS
${
lite_kernel_deps
}
pooling
)
cc_library
(
pool_compute_x86 SRCS pool_compute.cc DEPS
${
lite_kernel_deps
}
pooling
)
cc_library
(
batch_norm_compute_x86 SRCS batch_norm_compute.cc DEPS
${
lite_kernel_deps
}
)
lite_cc_test
(
test_fc_compute_x86 SRCS fc_compute_test.cc DEPS fc_compute_x86
)
lite_cc_test
(
test_fc_compute_x86 SRCS fc_compute_test.cc DEPS fc_compute_x86
)
lite_cc_test
(
test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86
)
lite_cc_test
(
test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86
)
...
@@ -28,6 +29,7 @@ lite_cc_test(test_relu_compute_x86 SRCS relu_compute_test.cc DEPS relu_compute_x
...
@@ -28,6 +29,7 @@ lite_cc_test(test_relu_compute_x86 SRCS relu_compute_test.cc DEPS relu_compute_x
lite_cc_test
(
test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86 operator
)
lite_cc_test
(
test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86 operator
)
lite_cc_test
(
test_scale_compute_x86 SRCS scale_compute_test.cc DEPS scale_compute_x86
)
lite_cc_test
(
test_scale_compute_x86 SRCS scale_compute_test.cc DEPS scale_compute_x86
)
lite_cc_test
(
test_dropout_compute_x86 SRCS dropout_compute_test.cc DEPS dropout_compute_x86
)
lite_cc_test
(
test_dropout_compute_x86 SRCS dropout_compute_test.cc DEPS dropout_compute_x86
)
lite_cc_test
(
test_batch_norm_compute_x86 SRCS batch_norm_compute_test.cc DEPS batch_norm_compute_x86
)
set
(
x86_kernels
set
(
x86_kernels
...
@@ -44,6 +46,7 @@ set(x86_kernels
...
@@ -44,6 +46,7 @@ set(x86_kernels
concat_compute_x86
concat_compute_x86
conv_compute_x86
conv_compute_x86
pool_compute_x86
pool_compute_x86
batch_norm_compute_x86
)
)
set
(
x86_kernels
"
${
x86_kernels
}
"
CACHE INTERNAL
"x86 kernels"
)
set
(
x86_kernels
"
${
x86_kernels
}
"
CACHE INTERNAL
"x86 kernels"
)
...
...
paddle/fluid/lite/kernels/x86/batch_norm_compute.cc
0 → 100644
浏览文件 @
672d56d7
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/batch_norm_compute.h"
REGISTER_LITE_KERNEL
(
batch_norm
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
BatchNormCompute
<
float
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"Scale"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"Mean"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"Variance"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"MeanOut"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"VarianceOut"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"MeanOut"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"SavedMean"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"SavedVariance"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/kernels/x86/batch_norm_compute.h
0 → 100644
浏览文件 @
672d56d7
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <random>
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
using
EigenArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
ConstEigenArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
template
<
typename
T
>
using
ConstEigenVectorArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
template
<
typename
T
>
class
BatchNormCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
BatchNormParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
BatchNormParam
>
();
bool
global_stats
=
param
.
is_test
||
param
.
use_global_stats
;
const
auto
*
x
=
param
.
x
;
const
auto
&
x_dims
=
x
->
dims
();
CHECK
(
x_dims
.
size
()
>=
2
&&
x_dims
.
size
()
<=
5
);
const
int
N
=
x_dims
[
0
];
const
int
C
=
param
.
data_layout
==
DATALAYOUT
(
kNCHW
)
?
x_dims
[
1
]
:
x_dims
[
x_dims
.
size
()
-
1
];
const
int
sample_size
=
x
->
dims
().
production
()
/
N
/
C
;
// alloc memory
param
.
y
->
template
mutable_data
<
T
>();
param
.
mean_out
->
template
mutable_data
<
T
>();
param
.
variance_out
->
template
mutable_data
<
T
>();
param
.
saved_mean
->
template
mutable_data
<
T
>();
param
.
saved_variance
->
template
mutable_data
<
T
>();
if
(
!
global_stats
)
{
// saved_xx is use just in this batch of data
EigenVectorArrayMap
<
T
>
saved_mean_e
(
param
.
saved_mean
->
mutable_data
<
T
>
(),
C
);
EigenVectorArrayMap
<
T
>
saved_variance_e
(
param
.
saved_variance
->
mutable_data
<
T
>
(),
C
);
saved_mean_e
.
setZero
();
saved_variance_e
.
setZero
();
EigenVectorArrayMap
<
T
>
running_mean_arr
(
param
.
mean_out
->
mutable_data
<
T
>
(),
C
);
EigenVectorArrayMap
<
T
>
running_var_arr
(
param
.
variance_out
->
mutable_data
<
T
>
(),
C
);
if
((
N
*
sample_size
)
==
1
)
{
LOG
(
WARNING
)
<<
"Only 1 element in normalization dimension, "
<<
"we skip the batch norm calculation, let y = x."
;
framework
::
TensorCopy
(
x
->
raw_tensor
(),
platform
::
CPUPlace
(),
&
param
.
y
->
raw_tensor
());
return
;
}
switch
(
param
.
data_layout
)
{
case
DATALAYOUT
(
kNCHW
):
{
ConstEigenArrayMap
<
T
>
x_arr
(
x
->
data
<
T
>
(),
sample_size
,
N
*
C
);
for
(
int
nc
=
0
;
nc
<
N
*
C
;
++
nc
)
{
saved_mean_e
(
nc
%
C
)
+=
x_arr
.
col
(
nc
).
sum
();
}
saved_mean_e
/=
N
*
sample_size
;
for
(
int
nc
=
0
;
nc
<
N
*
C
;
++
nc
)
{
saved_variance_e
(
nc
%
C
)
+=
(
x_arr
.
col
(
nc
)
-
saved_mean_e
(
nc
%
C
)).
matrix
().
squaredNorm
();
}
saved_variance_e
/=
N
*
sample_size
;
break
;
}
default:
LOG
(
FATAL
)
<<
"Unknown storage order: "
<<
DataLayoutToStr
(
param
.
data_layout
);
break
;
}
running_mean_arr
=
running_mean_arr
*
param
.
momentum
+
saved_mean_e
*
(
1.
-
param
.
momentum
);
running_var_arr
=
running_var_arr
*
param
.
momentum
+
saved_variance_e
*
(
1.
-
param
.
momentum
);
}
// use SavedMean and SavedVariance to do normalize
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>
inv_std
(
C
);
if
(
global_stats
)
{
ConstEigenVectorArrayMap
<
T
>
var_arr
(
param
.
variance
->
data
<
T
>
(),
C
);
inv_std
=
(
var_arr
+
param
.
epsilon
).
sqrt
().
inverse
();
}
else
{
EigenVectorArrayMap
<
T
>
saved_inv_std
(
param
.
saved_variance
->
mutable_data
<
T
>
(),
C
);
// inverse SavedVariance first, gradient will use it too.
saved_inv_std
=
(
saved_inv_std
+
param
.
epsilon
).
inverse
().
sqrt
();
inv_std
=
saved_inv_std
;
}
ConstEigenVectorArrayMap
<
T
>
mean_arr
(
global_stats
?
param
.
mean
->
data
<
T
>
()
:
param
.
saved_mean
->
data
<
T
>
(),
C
);
// ((x - est_mean) * (inv_var) * scale + bias
// formula transform ====>
// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
ConstEigenVectorArrayMap
<
T
>
scale_arr
(
param
.
scale
->
data
<
T
>
(),
C
);
ConstEigenVectorArrayMap
<
T
>
bias_arr
(
param
.
bias
->
data
<
T
>
(),
C
);
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>
new_scale
=
inv_std
*
scale_arr
;
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>
new_bias
=
bias_arr
-
mean_arr
*
inv_std
*
scale_arr
;
switch
(
param
.
data_layout
)
{
case
DATALAYOUT
(
kNCHW
):
{
EigenArrayMap
<
T
>
y_arr
(
param
.
y
->
mutable_data
<
T
>
(),
sample_size
,
N
*
C
);
ConstEigenArrayMap
<
T
>
x_arr
(
x
->
data
<
T
>
(),
sample_size
,
N
*
C
);
for
(
int
nc
=
0
;
nc
<
N
*
C
;
++
nc
)
{
y_arr
.
col
(
nc
)
=
x_arr
.
col
(
nc
)
*
new_scale
(
nc
%
C
)
+
new_bias
(
nc
%
C
);
}
break
;
}
default:
LOG
(
FATAL
)
<<
"Unknown storage order: "
<<
DataLayoutToStr
(
param
.
data_layout
);
break
;
}
}
virtual
~
BatchNormCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/x86/batch_norm_compute_test.cc
0 → 100644
浏览文件 @
672d56d7
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/batch_norm_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
TEST
(
batch_norm_x86
,
retrive_op
)
{
auto
batch_norm
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
(
"batch_norm"
);
ASSERT_FALSE
(
batch_norm
.
empty
());
ASSERT_TRUE
(
batch_norm
.
front
());
}
TEST
(
batch_norm_x86
,
init
)
{
BatchNormCompute
<
float
>
batch_norm
;
ASSERT_EQ
(
batch_norm
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
batch_norm
.
target
(),
TARGET
(
kX86
));
}
TEST
(
batch_norm_x86
,
run_test
)
{
lite
::
Tensor
x
,
scale
,
bias
,
mean
,
variance
,
y
,
mean_out
,
variance_out
,
saved_mean
,
saved_variance
;
constexpr
int
batch_size
=
2
;
std
::
vector
<
int64_t
>
x_shape
{
batch_size
,
3
,
64
,
64
};
x
.
Resize
(
lite
::
DDim
(
x_shape
));
std
::
vector
<
int64_t
>
scale_shape
{
3
};
scale
.
Resize
(
lite
::
DDim
(
scale_shape
));
std
::
vector
<
int64_t
>
bias_shape
{
3
};
bias
.
Resize
(
lite
::
DDim
(
bias_shape
));
std
::
vector
<
int64_t
>
mean_shape
{
3
};
mean
.
Resize
(
lite
::
DDim
(
mean_shape
));
std
::
vector
<
int64_t
>
variance_shape
{
3
};
variance
.
Resize
(
lite
::
DDim
(
variance_shape
));
std
::
vector
<
int64_t
>
y_shape
{
batch_size
,
3
,
64
,
64
};
y
.
Resize
(
lite
::
DDim
(
y_shape
));
std
::
vector
<
int64_t
>
mean_out_shape
{
3
};
mean_out
.
Resize
(
lite
::
DDim
(
mean_out_shape
));
std
::
vector
<
int64_t
>
variance_out_shape
{
3
};
variance_out
.
Resize
(
lite
::
DDim
(
variance_out_shape
));
std
::
vector
<
int64_t
>
saved_mean_shape
{
3
};
saved_mean
.
Resize
(
lite
::
DDim
(
saved_mean_shape
));
std
::
vector
<
int64_t
>
saved_variance_shape
{
3
};
saved_variance
.
Resize
(
lite
::
DDim
(
saved_variance_shape
));
auto
x_data
=
x
.
mutable_data
<
float
>
();
auto
scale_data
=
scale
.
mutable_data
<
float
>
();
auto
bias_data
=
bias
.
mutable_data
<
float
>
();
auto
mean_data
=
mean
.
mutable_data
<
float
>
();
auto
variance_data
=
variance
.
mutable_data
<
float
>
();
y
.
mutable_data
<
float
>
();
mean_out
.
mutable_data
<
float
>
();
variance_out
.
mutable_data
<
float
>
();
saved_mean
.
mutable_data
<
float
>
();
saved_variance
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
for
(
int
i
=
0
;
i
<
scale
.
dims
().
production
();
i
++
)
{
scale_data
[
i
]
=
static_cast
<
float
>
(
i
)
*
0.01
f
+
0.03
f
;
}
for
(
int
i
=
0
;
i
<
bias
.
dims
().
production
();
i
++
)
{
bias_data
[
i
]
=
static_cast
<
float
>
(
i
)
*
0.065
f
+
0.1
f
;
}
for
(
int
i
=
0
;
i
<
mean
.
dims
().
production
();
i
++
)
{
mean_data
[
i
]
=
static_cast
<
float
>
(
i
)
*
0.0565
f
;
}
for
(
int
i
=
0
;
i
<
variance
.
dims
().
production
();
i
++
)
{
variance_data
[
i
]
=
static_cast
<
float
>
(
i
)
*
2.08
f
+
1.5
f
;
}
// BatchNormCompute batch_norm;
BatchNormCompute
<
float
>
batch_norm
;
operators
::
BatchNormParam
param
;
param
.
x
=
&
x
;
param
.
is_test
=
false
;
param
.
scale
=
&
scale
;
param
.
bias
=
&
bias
;
param
.
mean
=
&
mean
;
param
.
variance
=
&
variance
;
param
.
use_global_stats
=
false
;
param
.
epsilon
=
1e-4
f
;
param
.
momentum
=
0.9
f
;
param
.
y
=
&
y
;
param
.
mean_out
=
&
mean_out
;
param
.
variance_out
=
&
variance_out
;
param
.
saved_mean
=
&
saved_mean
;
param
.
saved_variance
=
&
saved_variance
;
batch_norm
.
SetParam
(
param
);
batch_norm
.
Run
();
LOG
(
INFO
)
<<
"output: "
<<
y
;
LOG
(
INFO
)
<<
"mean_out: "
<<
mean_out
;
LOG
(
INFO
)
<<
"variance_out: "
<<
mean_out
;
LOG
(
INFO
)
<<
"saved_mean: "
<<
saved_mean
;
LOG
(
INFO
)
<<
"saved_variance: "
<<
saved_variance
;
/*for (int i = 0; i < y.dims().production(); i++) {
if(i < 5 || i > y.dims().production() - 5)
LOG(INFO) << y_data[i];
}*/
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
batch_norm
,
kX86
,
kFloat
,
kNCHW
,
def
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录