Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
77734ce7
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
77734ce7
编写于
4月 08, 2020
作者:
H
huzhiqiang
提交者:
GitHub
4月 08, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[x86] Fix x86 code style (#3287)
上级
720590c9
变更
63
隐藏空白更改
内联
并排
Showing
63 changed file
with
228 addition
and
210 deletion
+228
-210
lite/api/CMakeLists.txt
lite/api/CMakeLists.txt
+1
-2
lite/backends/x86/math/beam_search.cc
lite/backends/x86/math/beam_search.cc
+2
-2
lite/backends/x86/math/beam_search_test.cc
lite/backends/x86/math/beam_search_test.cc
+2
-2
lite/backends/x86/math/blas_impl.h
lite/backends/x86/math/blas_impl.h
+3
-3
lite/backends/x86/math/concat_and_split.cc
lite/backends/x86/math/concat_and_split.cc
+2
-2
lite/backends/x86/math/cross_entropy.cc
lite/backends/x86/math/cross_entropy.cc
+2
-2
lite/backends/x86/math/im2col.cc
lite/backends/x86/math/im2col.cc
+3
-3
lite/backends/x86/math/im2col_cfo_cpu.h
lite/backends/x86/math/im2col_cfo_cpu.h
+3
-3
lite/backends/x86/math/math_function.cc
lite/backends/x86/math/math_function.cc
+2
-2
lite/backends/x86/math/math_function_impl.h
lite/backends/x86/math/math_function_impl.h
+3
-3
lite/backends/x86/math/maxouting.cc
lite/backends/x86/math/maxouting.cc
+3
-2
lite/backends/x86/math/pooling.cc
lite/backends/x86/math/pooling.cc
+11
-7
lite/backends/x86/math/sample_prob.h
lite/backends/x86/math/sample_prob.h
+2
-2
lite/backends/x86/math/search_fc.cc
lite/backends/x86/math/search_fc.cc
+1
-1
lite/backends/x86/math/selected_rows_functor.cc
lite/backends/x86/math/selected_rows_functor.cc
+7
-7
lite/backends/x86/math/sequence2batch.cc
lite/backends/x86/math/sequence2batch.cc
+3
-3
lite/backends/x86/math/sequence2batch.h
lite/backends/x86/math/sequence2batch.h
+4
-4
lite/backends/x86/math/sequence_padding.cc
lite/backends/x86/math/sequence_padding.cc
+4
-4
lite/backends/x86/math/sequence_padding.h
lite/backends/x86/math/sequence_padding.h
+5
-5
lite/backends/x86/math/sequence_pooling.cc
lite/backends/x86/math/sequence_pooling.cc
+7
-7
lite/backends/x86/math/sequence_pooling_test.cc
lite/backends/x86/math/sequence_pooling_test.cc
+4
-4
lite/backends/x86/math/sequence_scale.cc
lite/backends/x86/math/sequence_scale.cc
+1
-1
lite/backends/x86/math/sequence_topk_avg_pooling.cc
lite/backends/x86/math/sequence_topk_avg_pooling.cc
+2
-2
lite/backends/x86/math/softmax_impl.h
lite/backends/x86/math/softmax_impl.h
+5
-5
lite/backends/x86/math/tree2col.cc
lite/backends/x86/math/tree2col.cc
+4
-4
lite/backends/x86/math/unpooling.cc
lite/backends/x86/math/unpooling.cc
+3
-2
lite/backends/x86/math/vol2col.cc
lite/backends/x86/math/vol2col.cc
+2
-2
lite/fluid/lod.h
lite/fluid/lod.h
+1
-1
lite/kernels/x86/activation_compute.h
lite/kernels/x86/activation_compute.h
+2
-2
lite/kernels/x86/attention_padding_mask_compute.h
lite/kernels/x86/attention_padding_mask_compute.h
+6
-5
lite/kernels/x86/batch_norm_compute.h
lite/kernels/x86/batch_norm_compute.h
+24
-19
lite/kernels/x86/concat_compute.h
lite/kernels/x86/concat_compute.h
+2
-2
lite/kernels/x86/conv_compute.h
lite/kernels/x86/conv_compute.h
+3
-3
lite/kernels/x86/dropout_compute.h
lite/kernels/x86/dropout_compute.h
+3
-3
lite/kernels/x86/elementwise_op_function.h
lite/kernels/x86/elementwise_op_function.h
+14
-11
lite/kernels/x86/fc_compute.h
lite/kernels/x86/fc_compute.h
+4
-4
lite/kernels/x86/fill_constant_batch_size_like_compute.h
lite/kernels/x86/fill_constant_batch_size_like_compute.h
+2
-2
lite/kernels/x86/gather_compute.h
lite/kernels/x86/gather_compute.h
+3
-3
lite/kernels/x86/gru_compute.h
lite/kernels/x86/gru_compute.h
+8
-7
lite/kernels/x86/layer_norm_compute.h
lite/kernels/x86/layer_norm_compute.h
+7
-7
lite/kernels/x86/lookup_table_compute.h
lite/kernels/x86/lookup_table_compute.h
+3
-3
lite/kernels/x86/match_matrix_tensor_compute.cc
lite/kernels/x86/match_matrix_tensor_compute.cc
+4
-4
lite/kernels/x86/matmul_compute.h
lite/kernels/x86/matmul_compute.h
+1
-1
lite/kernels/x86/mul_compute.h
lite/kernels/x86/mul_compute.h
+1
-1
lite/kernels/x86/reduce_compute.h
lite/kernels/x86/reduce_compute.h
+1
-1
lite/kernels/x86/scale_compute.h
lite/kernels/x86/scale_compute.h
+2
-2
lite/kernels/x86/search_grnn_compute.cc
lite/kernels/x86/search_grnn_compute.cc
+1
-1
lite/kernels/x86/search_group_padding_compute.h
lite/kernels/x86/search_group_padding_compute.h
+5
-5
lite/kernels/x86/search_seq_fc_compute.h
lite/kernels/x86/search_seq_fc_compute.h
+4
-2
lite/kernels/x86/sequence_arithmetic_compute.h
lite/kernels/x86/sequence_arithmetic_compute.h
+3
-3
lite/kernels/x86/sequence_concat_compute.h
lite/kernels/x86/sequence_concat_compute.h
+2
-2
lite/kernels/x86/sequence_concat_compute_test.cc
lite/kernels/x86/sequence_concat_compute_test.cc
+1
-1
lite/kernels/x86/sequence_expand_as_compute.h
lite/kernels/x86/sequence_expand_as_compute.h
+8
-7
lite/kernels/x86/sequence_pool_compute.h
lite/kernels/x86/sequence_pool_compute.h
+1
-1
lite/kernels/x86/sequence_reshape_compute.h
lite/kernels/x86/sequence_reshape_compute.h
+2
-2
lite/kernels/x86/shape_compute.h
lite/kernels/x86/shape_compute.h
+1
-1
lite/kernels/x86/softmax_compute.h
lite/kernels/x86/softmax_compute.h
+1
-1
lite/kernels/x86/squeeze_compute.h
lite/kernels/x86/squeeze_compute.h
+5
-5
lite/kernels/x86/stack_compute.h
lite/kernels/x86/stack_compute.h
+2
-2
lite/kernels/x86/transpose_compute.h
lite/kernels/x86/transpose_compute.h
+2
-2
lite/kernels/x86/uniform_random_compute.cc
lite/kernels/x86/uniform_random_compute.cc
+2
-2
lite/kernels/x86/var_conv_2d_compute.h
lite/kernels/x86/var_conv_2d_compute.h
+5
-5
lite/kernels/x86/var_conv_2d_compute_test.cc
lite/kernels/x86/var_conv_2d_compute_test.cc
+1
-1
未找到文件。
lite/api/CMakeLists.txt
浏览文件 @
77734ce7
...
...
@@ -10,6 +10,7 @@ if (LITE_ON_TINY_PUBLISH)
endif
()
set
(
light_lib_DEPS light_api paddle_api paddle_api_light
)
if
((
NOT LITE_ON_TINY_PUBLISH
)
AND
(
LITE_WITH_CUDA OR LITE_WITH_X86 OR LITE_WITH_BM OR ARM_TARGET_OS STREQUAL
"android"
OR ARM_TARGET_OS STREQUAL
"armlinux"
))
#full api dynamic library
lite_cc_library
(
paddle_full_api_shared SHARED SRCS paddle_api.cc light_api.cc cxx_api.cc cxx_api_impl.cc light_api_impl.cc
...
...
@@ -264,8 +265,6 @@ if (NOT LITE_ON_TINY_PUBLISH)
NPU_DEPS
${
npu_kernels
}
CL_DEPS
${
opencl_kernels
}
FPGA_DEPS
${
fpga_kernels
}
CV_DEPS paddle_cv_arm
NPU_DEPS
${
npu_kernels
}
BM_DEPS
${
bm_kernels
}
)
# The final inference library for just MobileConfig.
bundle_static_library
(
paddle_api_full paddle_api_full_bundled bundle_full_api
)
...
...
lite/backends/x86/math/beam_search.cc
浏览文件 @
77734ce7
...
...
@@ -96,8 +96,8 @@ class BeamSearchFunctor<TARGET(kX86), T> {
// : nullptr;
// fill in data
std
::
vector
<
size
_t
>
low_level
;
size
_t
low_offset
=
0
;
std
::
vector
<
uint64
_t
>
low_level
;
uint64
_t
low_offset
=
0
;
for
(
auto
&
items
:
selected_items
)
{
low_level
.
push_back
(
low_offset
);
for
(
auto
&
item
:
items
)
{
...
...
lite/backends/x86/math/beam_search_test.cc
浏览文件 @
77734ce7
...
...
@@ -22,8 +22,8 @@ void PrepareCPUTensors(paddle::framework::LoDTensor* ids,
paddle
::
framework
::
LoDTensor
*
pre_scores
)
{
// lod
paddle
::
framework
::
LoD
lod
;
std
::
vector
<
size
_t
>
level0
({
0
,
2
,
4
});
std
::
vector
<
size
_t
>
level1
({
0
,
1
,
2
,
3
,
4
});
std
::
vector
<
uint64
_t
>
level0
({
0
,
2
,
4
});
std
::
vector
<
uint64
_t
>
level1
({
0
,
1
,
2
,
3
,
4
});
lod
.
push_back
(
level0
);
lod
.
push_back
(
level1
);
ids
->
set_lod
(
lod
);
...
...
lite/backends/x86/math/blas_impl.h
浏览文件 @
77734ce7
...
...
@@ -483,7 +483,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a,
mat_a
.
data
<
T
>
(),
mat_b
.
data
<
T
>
(),
beta
,
mat_out
->
mutable_data
<
T
>
());
mat_out
->
template
mutable_data
<
T
>());
}
template
<
>
...
...
@@ -759,7 +759,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a,
mat_a
.
data
<
T
>
(),
mat_b
.
data
<
T
>
(),
beta
,
mat_out
->
mutable_data
<
T
>
());
mat_out
->
template
mutable_data
<
T
>());
}
else
{
PADDLE_ENFORCE
(
dim_a
.
batch_size_
==
dim_b
.
batch_size_
||
dim_a
.
batch_size_
==
0
||
dim_b
.
batch_size_
==
0
);
...
...
@@ -773,7 +773,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a,
mat_a
.
data
<
T
>
(),
mat_b
.
data
<
T
>
(),
beta
,
mat_out
->
mutable_data
<
T
>
(),
mat_out
->
template
mutable_data
<
T
>(),
dim_a
.
batch_size_
==
0
?
dim_b
.
batch_size_
:
dim_a
.
batch_size_
,
dim_a
.
stride_
,
dim_b
.
stride_
);
...
...
lite/backends/x86/math/concat_and_split.cc
浏览文件 @
77734ce7
...
...
@@ -51,7 +51,7 @@ class ConcatFunctor<lite::TargetType::kX86, T> {
// auto cpu_place = boost::get<platform::CPUPlace>(context.GetPlace());
// computation
auto
output_data
=
output
->
mutable_data
<
T
>
();
auto
output_data
=
output
->
template
mutable_data
<
T
>();
int
col_idx
=
0
;
for
(
int
j
=
0
;
j
<
num
;
++
j
)
{
int
col_len
=
input_cols
[
j
];
...
...
@@ -108,7 +108,7 @@ class SplitFunctor<lite::TargetType::kX86, T> {
int
col_len
=
output_cols
[
j
];
auto
*
out_tensor
=
outputs
->
at
(
j
);
if
(
out_tensor
!=
nullptr
)
{
T
*
dst_ptr
=
out_tensor
->
mutable_data
<
T
>
()
+
k
*
col_len
;
T
*
dst_ptr
=
out_tensor
->
template
mutable_data
<
T
>()
+
k
*
col_len
;
std
::
copy_n
(
src_ptr
+
col_idx
,
col_len
,
dst_ptr
);
// memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx,
// sizeof(T) * col_len);
...
...
lite/backends/x86/math/cross_entropy.cc
浏览文件 @
77734ce7
...
...
@@ -50,8 +50,8 @@ class CrossEntropyFunctor<lite::TargetType::kX86, T> {
.
reshape
(
batch_axis_remain
)
.
sum
(
Eigen
::
DSizes
<
int
,
1
>
(
1
)));
}
else
{
const
T
*
prob_data
=
prob
->
data
<
T
>
();
T
*
loss_data
=
out
->
mutable_data
<
T
>
();
const
T
*
prob_data
=
prob
->
template
data
<
T
>();
T
*
loss_data
=
out
->
template
mutable_data
<
T
>();
const
int64_t
*
label_data
=
labels
->
data
<
int64_t
>
();
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
...
...
lite/backends/x86/math/im2col.cc
浏览文件 @
77734ce7
...
...
@@ -99,7 +99,7 @@ class Col2ImFunctor<lite::x86::math::ColFormat::kCFO,
int
channels_col
=
im_channels
*
filter_height
*
filter_width
;
T
*
im_data
=
im
->
mutable_data
<
T
>
();
T
*
im_data
=
im
->
template
mutable_data
<
T
>();
const
T
*
col_data
=
col
.
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
...
...
@@ -161,7 +161,7 @@ class Im2ColFunctor<lite::x86::math::ColFormat::kOCF,
int
col_width
=
col
->
dims
()[
1
];
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
->
mutable_data
<
T
>
();
T
*
col_data
=
col
->
template
mutable_data
<
T
>();
for
(
int
col_row_idx
=
0
;
col_row_idx
<
col_height
;
++
col_row_idx
)
{
for
(
int
col_col_idx
=
0
;
col_col_idx
<
col_width
;
++
col_col_idx
)
{
...
...
@@ -235,7 +235,7 @@ class Col2ImFunctor<lite::x86::math::ColFormat::kOCF,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
T
*
im_data
=
im
->
mutable_data
<
T
>
();
T
*
im_data
=
im
->
template
mutable_data
<
T
>();
const
T
*
col_data
=
col
.
data
<
T
>
();
for
(
int
col_row_idx
=
0
;
col_row_idx
<
col_height
;
++
col_row_idx
)
{
...
...
lite/backends/x86/math/im2col_cfo_cpu.h
浏览文件 @
77734ce7
...
...
@@ -42,7 +42,7 @@ inline void im2col_common(const lite::Tensor& im,
int
channels_col
=
im_channels
*
filter_height
*
filter_width
;
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
->
mutable_data
<
T
>
();
T
*
col_data
=
col
->
template
mutable_data
<
T
>();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
int
h_offset
=
(
c
/
filter_width
)
%
filter_height
;
...
...
@@ -77,7 +77,7 @@ inline void im2col_sh1sw1dh1dw1ph0pw0(const lite::Tensor& im,
int
output_width
=
col
->
dims
()[
4
];
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
->
mutable_data
<
T
>
();
T
*
col_data
=
col
->
template
mutable_data
<
T
>();
int
col_matrix_width
=
output_width
*
output_height
;
int
im_size
=
im_height
*
im_width
;
size_t
copy_size
=
sizeof
(
T
)
*
output_width
;
...
...
@@ -123,7 +123,7 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const lite::Tensor& im,
constexpr
int
prw
=
1
;
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
->
mutable_data
<
T
>
();
T
*
col_data
=
col
->
template
mutable_data
<
T
>();
int
im_size
=
im_height
*
im_width
;
int
col_matrix_width
=
output_width
*
output_height
;
int
col_block_fh
=
filter_width
*
col_matrix_width
;
// fw*oh*ow
...
...
lite/backends/x86/math/math_function.cc
浏览文件 @
77734ce7
...
...
@@ -65,7 +65,7 @@ struct TensorSetConstantCPU {
:
tensor_
(
tensor
),
value_
(
value
)
{}
template
<
typename
T
>
void
apply
()
const
{
auto
*
begin
=
tensor_
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
auto
*
begin
=
tensor_
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
std
::
fill
(
begin
,
begin
+
tensor_
->
numel
(),
static_cast
<
T
>
(
value_
));
}
lite
::
Tensor
*
tensor_
;
...
...
@@ -126,7 +126,7 @@ struct RowwiseAdd<lite::TargetType::kX86, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
vector_data
=
vector
.
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
();
T
*
output_data
=
output
->
template
mutable_data
<
T
>();
for
(
int64_t
i
=
0
;
i
<
in_dims
[
0
];
++
i
)
{
for
(
int64_t
j
=
0
;
j
<
size
;
++
j
)
{
output_data
[
i
*
in_dims
[
0
]
+
j
]
=
...
...
lite/backends/x86/math/math_function_impl.h
浏览文件 @
77734ce7
...
...
@@ -83,7 +83,7 @@ class ColwiseSum<lite::TargetType::kX86, T> {
auto
size
=
in_dims
[
1
];
PADDLE_ENFORCE_EQ
(
out
->
numel
(),
size
);
T
*
out_buf
=
out
->
mutable_data
<
T
>
(
out
->
target
());
T
*
out_buf
=
out
->
template
mutable_data
<
T
>(
out
->
target
());
const
T
*
in_buf
=
input
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
static_cast
<
size_t
>
(
height
);
++
i
)
{
...
...
@@ -129,7 +129,7 @@ class RowwiseMean<lite::TargetType::kX86, T> {
auto
size
=
in_dims
[
1
];
PADDLE_ENFORCE_EQ
(
out
->
numel
(),
height
);
auto
inv_size
=
1.0
/
size
;
T
*
out_buf
=
out
->
mutable_data
<
T
>
(
out
->
target
());
T
*
out_buf
=
out
->
template
mutable_data
<
T
>(
out
->
target
());
const
T
*
in_buf
=
input
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
static_cast
<
size_t
>
(
height
);
++
i
)
{
...
...
@@ -173,7 +173,7 @@ class RowwiseSum<lite::TargetType::kX86, T> {
auto
size
=
in_dims
[
1
];
PADDLE_ENFORCE_EQ
(
out
->
numel
(),
height
);
T
*
out_buf
=
out
->
mutable_data
<
T
>
(
out
->
target
());
T
*
out_buf
=
out
->
template
mutable_data
<
T
>(
out
->
target
());
const
T
*
in_buf
=
input
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
static_cast
<
size_t
>
(
height
);
++
i
)
{
...
...
lite/backends/x86/math/maxouting.cc
浏览文件 @
77734ce7
...
...
@@ -35,7 +35,7 @@ class MaxOutFunctor<lite::TargetType::kX86, T> {
// c_size means the output size of each sample
int
c_size
=
fea_size
*
output_channels
;
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
T
*
output_data
=
output
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
int
new_bindex
=
c_size
*
i
;
...
...
@@ -72,7 +72,8 @@ class MaxOutGradFunctor<lite::TargetType::kX86, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
T
*
input_grad_data
=
input_grad
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
int
blen
=
fea_size
*
output_channels
*
i
;
...
...
lite/backends/x86/math/pooling.cc
浏览文件 @
77734ce7
...
...
@@ -54,8 +54,8 @@ class Pool2dFunctor<lite::TargetType::kX86, PoolProcess, T> {
const
int
input_stride
=
input_height
*
input_width
;
const
int
output_stride
=
output_height
*
output_width
;
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
const
T
*
input_data
=
input
->
template
data
<
T
>();
T
*
output_data
=
output
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
int
hstart
,
hend
;
int
wstart
,
wend
;
...
...
@@ -137,7 +137,8 @@ class Pool2dGradFunctor<lite::TargetType::kX86, PoolProcess, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
T
*
input_grad_data
=
input_grad
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
int
hstart
,
hend
;
int
wstart
,
wend
;
...
...
@@ -220,7 +221,8 @@ class MaxPool2dGradFunctor<lite::TargetType::kX86, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
T
*
input_grad_data
=
input_grad
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
...
@@ -322,7 +324,7 @@ class Pool3dFunctor<lite::TargetType::kX86, PoolProcess, T> {
const
int
output_stride
=
output_depth
*
output_height
*
output_width
;
const
T
*
input_data
=
input
.
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
T
*
output_data
=
output
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
int
dstart
,
dend
;
int
hstart
,
hend
;
...
...
@@ -425,7 +427,8 @@ class Pool3dGradFunctor<lite::TargetType::kX86, PoolProcess, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
T
*
input_grad_data
=
input_grad
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
int
dstart
,
dend
;
int
hstart
,
hend
;
...
...
@@ -530,7 +533,8 @@ class MaxPool3dGradFunctor<lite::TargetType::kX86, T> {
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_data
=
output
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
T
*
input_grad_data
=
input_grad
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
...
lite/backends/x86/math/sample_prob.h
浏览文件 @
77734ce7
...
...
@@ -58,11 +58,11 @@ class SampleWithProb {
const
int64_t
*
label_data
=
L
->
data
<
int64_t
>
();
// int64_t* samples_data =
// S->mutable_data<int64_t>(ret_dim, Target);
// T* probabilities_data = P->mutable_data<T>(ret_dim, Target);
// T* probabilities_data = P->
template
mutable_data<T>(ret_dim, Target);
S
->
Resize
({
batch_size
,
num_sampled_classes
});
auto
*
samples_data
=
S
->
mutable_data
<
int64_t
>
(
Target
);
P
->
Resize
({
batch_size
,
num_sampled_classes
});
auto
*
probabilities_data
=
P
->
mutable_data
<
T
>
(
Target
);
auto
*
probabilities_data
=
P
->
template
mutable_data
<
T
>(
Target
);
// temp sets for unique sampling
std
::
unordered_set
<
int64_t
>
tmp_samples
;
...
...
lite/backends/x86/math/search_fc.cc
浏览文件 @
77734ce7
...
...
@@ -42,7 +42,7 @@ class SearchFcFunctor<lite::TargetType::kX86, T> {
lite
::
DDim
dims
(
std
::
vector
<
int64_t
>
({
bottom
.
dims
()[
0
],
out_size
}));
const
auto
bottom_data
=
bottom
.
data
<
T
>
();
auto
top_data
=
top
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
auto
top_data
=
top
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
const
auto
weights
=
w
.
data
<
T
>
();
auto
blas
=
math
::
GetBlas
<
lite
::
TargetType
::
kX86
,
T
>
(
context
);
call_gemm
<
lite
::
X86Context
,
T
>
(
blas
,
...
...
lite/backends/x86/math/selected_rows_functor.cc
浏览文件 @
77734ce7
...
...
@@ -52,7 +52,7 @@ struct SelectedRowsAdd<lite::TargetType::kX86, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
in2_value
.
numel
()
/
in2_rows
.
size
());
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
out_value
->
numel
()
/
out_rows
.
size
());
auto
*
out_data
=
out_value
->
mutable_data
<
T
>
();
auto
*
out_data
=
out_value
->
template
mutable_data
<
T
>();
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
std
::
copy_n
(
in1_data
,
in1_value
.
numel
(),
out_data
);
...
...
@@ -87,7 +87,7 @@ struct SelectedRowsAddTensor<lite::TargetType::kX86, T> {
functor
(
context
,
output
,
0.0
);
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
out_data
=
output
->
mutable_data
<
T
>
();
auto
*
out_data
=
output
->
template
mutable_data
<
T
>();
for
(
size_t
i
=
0
;
i
<
in1_rows
.
size
();
i
++
)
{
for
(
int64_t
j
=
0
;
j
<
in1_row_numel
;
j
++
)
{
...
...
@@ -127,7 +127,7 @@ struct SelectedRowsAddTo<lite::TargetType::kX86, T> {
in2_rows
.
insert
(
in2_rows
.
end
(),
in1_rows
.
begin
(),
in1_rows
.
end
());
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
in2_data
=
in2_value
->
mutable_data
<
T
>
();
auto
*
in2_data
=
in2_value
->
template
mutable_data
<
T
>();
std
::
copy_n
(
in1_data
,
in1_value
.
numel
(),
in2_data
+
input2_offset
);
}
};
...
...
@@ -161,7 +161,7 @@ struct SelectedRowsSumTo<lite::TargetType::kX86, T> {
input2
->
set_rows
(
in2_rows
);
auto
*
in2_value
=
input2
->
mutable_value
();
T
*
in2_data
=
in2_value
->
mutable_data
<
T
>
();
T
*
in2_data
=
in2_value
->
template
mutable_data
<
T
>();
auto
blas
=
math
::
GetBlas
<
lite
::
TargetType
::
kX86
,
T
>
(
context
);
size_t
offset
=
0u
;
for
(
size_t
i
=
0u
;
i
!=
input1
.
size
();
++
i
)
{
...
...
@@ -194,7 +194,7 @@ struct SelectedRowsAddToTensor<lite::TargetType::kX86, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
input2
->
numel
()
/
in1_height
);
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
input2_data
=
input2
->
mutable_data
<
T
>
();
auto
*
input2_data
=
input2
->
template
mutable_data
<
T
>();
for
(
size_t
i
=
0
;
i
<
in1_rows
.
size
();
i
++
)
{
for
(
int64_t
j
=
0
;
j
<
in1_row_numel
;
j
++
)
{
...
...
@@ -305,7 +305,7 @@ struct MergeAdd<lite::TargetType::kX86, T> {
lite
::
DDim
dims
(
std
::
vector
<
int64_t
>
(
{
static_cast
<
int64_t
>
(
merged_row_set
.
size
()),
input_width
}));
out
.
mutable_value
()
->
Resize
(
dims
);
auto
*
out_data
=
out
.
mutable_value
()
->
mutable_data
<
T
>
();
auto
*
out_data
=
out
.
mutable_value
()
->
template
mutable_data
<
T
>();
if
(
merged_row_set
.
size
()
==
row_num
&&
!
sorted_result
)
{
// no duplicated ids, just concat the result together
...
...
@@ -385,7 +385,7 @@ struct UpdateToTensor<lite::TargetType::kX86, T> {
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
input2
->
numel
()
/
in1_height
);
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
input2_data
=
input2
->
data
<
T
>
();
auto
*
input2_data
=
input2
->
template
data
<
T
>();
// FIXME(typhoonzero): use macro fix the below messy code.
switch
(
op
)
{
...
...
lite/backends/x86/math/sequence2batch.cc
浏览文件 @
77734ce7
...
...
@@ -24,10 +24,10 @@ class CopyMatrixRowsFunctor<lite::TargetType::kX86, T> {
public:
void
operator
()(
const
lite
::
Context
<
lite
::
TargetType
::
kX86
>&
context
,
const
lite
::
Tensor
&
src
,
const
std
::
vector
<
size
_t
>&
index_lod
,
const
std
::
vector
<
uint64
_t
>&
index_lod
,
lite
::
Tensor
*
dst
,
bool
is_src_index
)
{
const
size
_t
*
index
=
index_lod
.
data
();
const
uint64
_t
*
index
=
index_lod
.
data
();
const
auto
&
src_dims
=
src
.
dims
();
const
auto
&
dst_dims
=
dst
->
dims
();
PADDLE_ENFORCE_EQ
(
...
...
@@ -39,7 +39,7 @@ class CopyMatrixRowsFunctor<lite::TargetType::kX86, T> {
auto
height
=
dst_dims
[
0
];
auto
width
=
dst_dims
[
1
];
auto
*
src_data
=
src
.
data
<
T
>
();
auto
*
dst_data
=
dst
->
mutable_data
<
T
>
();
auto
*
dst_data
=
dst
->
template
mutable_data
<
T
>();
const
int
sz
=
width
*
sizeof
(
T
);
if
(
is_src_index
)
{
for
(
int
i
=
0
;
i
<
height
;
++
i
)
{
...
...
lite/backends/x86/math/sequence2batch.h
浏览文件 @
77734ce7
...
...
@@ -36,7 +36,7 @@ class CopyMatrixRowsFunctor {
// The indexed rows are based on the input index.
void
operator
()(
const
lite
::
Context
<
Target
>&
context
,
const
lite
::
Tensor
&
src
,
const
std
::
vector
<
size
_t
>&
index_lod
,
const
std
::
vector
<
uint64
_t
>&
index_lod
,
lite
::
Tensor
*
dst
,
bool
is_src_index
);
};
...
...
@@ -130,8 +130,8 @@ class LoDTensor2BatchFunctor {
// batch_lods[2] is the sort order for the input LoDTensor.
batch_lods
->
at
(
2
).
resize
(
seq_info
.
size
());
size_t
*
batch_starts
=
batch_lods
->
at
(
0
).
data
();
size_t
*
seq2batch_idx
=
batch_lods
->
at
(
1
).
data
();
auto
*
batch_starts
=
batch_lods
->
at
(
0
).
data
();
auto
*
seq2batch_idx
=
batch_lods
->
at
(
1
).
data
();
batch_starts
[
0
]
=
0
;
for
(
int
n
=
0
;
n
<
max_seqlen
;
n
++
)
{
auto
batch_id
=
static_cast
<
int
>
(
batch_starts
[
n
]);
...
...
@@ -148,7 +148,7 @@ class LoDTensor2BatchFunctor {
}
batch_starts
[
n
+
1
]
=
static_cast
<
size_t
>
(
batch_id
);
}
size_t
*
seq_order
=
batch_lods
->
at
(
2
).
data
();
auto
*
seq_order
=
batch_lods
->
at
(
2
).
data
();
for
(
size_t
i
=
0
;
i
<
seq_info
.
size
();
++
i
)
{
seq_order
[
i
]
=
seq_info
[
i
].
seq_idx
;
}
...
...
lite/backends/x86/math/sequence_padding.cc
浏览文件 @
77734ce7
...
...
@@ -22,15 +22,15 @@ namespace math {
template
<
typename
T
>
void
CopyValidData
(
lite
::
Tensor
*
dst_tensor
,
const
lite
::
Tensor
*
src_tensor
,
const
std
::
vector
<
size
_t
>&
seq_offsets
,
const
std
::
vector
<
uint64
_t
>&
seq_offsets
,
int
pad_seq_len
,
int
step_width
,
bool
norm_by_len
,
CopyType
type
,
PadLayout
layout
)
{
int
seq_num
=
seq_offsets
.
size
()
-
1
;
const
T
*
src_data
=
src_tensor
->
data
<
T
>
();
T
*
dst_data
=
dst_tensor
->
mutable_data
<
T
>
();
const
T
*
src_data
=
src_tensor
->
template
data
<
T
>();
T
*
dst_data
=
dst_tensor
->
template
mutable_data
<
T
>();
int
seq_cpy_gap
=
step_width
;
int
pad_cpy_gap
=
...
...
@@ -113,7 +113,7 @@ class PaddingLoDTensorFunctor<lite::TargetType::kX86, T> {
"'step_width'."
);
// fill padding value
T
*
pad_data
=
pad_tensor
->
mutable_data
<
T
>
();
T
*
pad_data
=
pad_tensor
->
template
mutable_data
<
T
>();
const
T
*
pad_value_data
=
pad_value
.
data
<
T
>
();
if
(
pad_value
.
numel
()
==
1
)
{
fast_mem_init
<
T
>
(
...
...
lite/backends/x86/math/sequence_padding.h
浏览文件 @
77734ce7
...
...
@@ -30,10 +30,10 @@ enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth };
enum
CopyType
{
kSeqToPad
,
kPadToSeq
};
inline
static
size
_t
MaximumSequenceLength
(
const
std
::
vector
<
size
_t
>&
seq_offset
)
{
size
_t
seq_num
=
seq_offset
.
size
()
-
1
;
size
_t
max_seq_len
=
0
;
inline
static
uint64
_t
MaximumSequenceLength
(
const
std
::
vector
<
uint64
_t
>&
seq_offset
)
{
uint64
_t
seq_num
=
seq_offset
.
size
()
-
1
;
uint64
_t
max_seq_len
=
0
;
for
(
size_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
max_seq_len
=
std
::
max
(
max_seq_len
,
seq_offset
[
i
+
1
]
-
seq_offset
[
i
]);
}
...
...
@@ -42,7 +42,7 @@ inline static size_t MaximumSequenceLength(
inline
static
void
CheckDims
(
const
lite
::
DDim
&
seq_tensor_dims
,
const
lite
::
DDim
&
pad_tensor_dims
,
const
std
::
vector
<
size
_t
>&
seq_offset
,
const
std
::
vector
<
uint64
_t
>&
seq_offset
,
int64_t
padded_seq_len
,
int64_t
step_width
,
const
PadLayout
&
layout
)
{
...
...
lite/backends/x86/math/sequence_pooling.cc
浏览文件 @
77734ce7
...
...
@@ -55,7 +55,7 @@ class MaxSeqPoolFunctor {
auto
starts
=
input
.
lod
()[
0
];
const
T
*
in_data
=
input
.
data
<
T
>
();
T
*
out_data
=
output
->
mutable_data
<
T
>
();
T
*
out_data
=
output
->
template
mutable_data
<
T
>();
int
*
max_index
=
index
->
mutable_data
<
int
>
();
int64_t
num_seq
=
out_dims
[
0
];
...
...
@@ -103,7 +103,7 @@ class MaxSeqPoolFunctor<T, true> {
auto
starts
=
input
.
lod
()[
0
];
const
T
*
in_data
=
input
.
data
<
T
>
();
T
*
out_data
=
output
->
mutable_data
<
T
>
();
T
*
out_data
=
output
->
template
mutable_data
<
T
>();
int64_t
num_seq
=
out_dims
[
0
];
int64_t
dim
=
output
->
numel
()
/
num_seq
;
...
...
@@ -145,7 +145,7 @@ class MaxSeqPoolGradFunctor {
const
T
*
og_data
=
out_grad
.
data
<
T
>
();
const
int
*
max_index
=
index
.
data
<
int
>
();
T
*
ig_data
=
in_grad
->
mutable_data
<
T
>
();
T
*
ig_data
=
in_grad
->
template
mutable_data
<
T
>();
SetConstant
<
TARGET
(
kX86
),
T
>
set_zero
;
set_zero
(
context
,
in_grad
,
static_cast
<
T
>
(
0.0
));
...
...
@@ -170,7 +170,7 @@ class LastSeqPoolFunctor {
lite
::
Tensor
*
output
)
{
// Create pointers to input and output data
auto
*
in_data
=
input
.
data
<
T
>
();
auto
*
out_data
=
output
->
mutable_data
<
T
>
();
auto
*
out_data
=
output
->
template
mutable_data
<
T
>();
// Calculate the size of each item in sequence
int64_t
item_size
=
input
.
numel
()
/
input
.
dims
()[
0
];
...
...
@@ -203,7 +203,7 @@ class FirstSeqPoolFunctor {
lite
::
Tensor
*
output
)
{
// Create pointers to input and output data
auto
*
in_data
=
input
.
data
<
T
>
();
auto
*
out_data
=
output
->
mutable_data
<
T
>
();
auto
*
out_data
=
output
->
template
mutable_data
<
T
>();
// Calculate the size of each item in sequence
int64_t
item_size
=
input
.
numel
()
/
input
.
dims
()[
0
];
...
...
@@ -238,7 +238,7 @@ class SumSeqPoolGradFunctor {
int64_t
in_w
=
in_grad
->
numel
()
/
in_grad
->
dims
()[
0
];
PADDLE_ENFORCE
(
in_w
==
out_w
);
const
T
*
out_g_data
=
out_grad
.
data
<
T
>
();
T
*
in_g_data
=
in_grad
->
mutable_data
<
T
>
(
TARGET
(
kX86
));
T
*
in_g_data
=
in_grad
->
template
mutable_data
<
T
>(
TARGET
(
kX86
));
auto
blas
=
math
::
GetBlas
<
TARGET
(
kX86
),
T
>
(
context
);
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod
.
size
())
-
1
;
++
i
)
{
int64_t
h
=
static_cast
<
int64_t
>
(
lod
[
i
+
1
]
-
lod
[
i
]);
...
...
@@ -288,7 +288,7 @@ class SequencePoolFunctor<TARGET(kX86), T> {
auto
lod
=
input
.
lod
()[
0
];
if
(
pooltype
==
"SUM"
)
{
const
T
*
src
=
input
.
data
<
T
>
();
T
*
dst
=
output
->
mutable_data
<
T
>
(
TARGET
(
kX86
));
T
*
dst
=
output
->
template
mutable_data
<
T
>(
TARGET
(
kX86
));
jit
::
seq_pool_attr_t
attr
(
static_cast
<
int
>
(
input
.
numel
()
/
input
.
dims
()[
0
]),
jit
::
SeqPoolType
::
kSum
);
...
...
lite/backends/x86/math/sequence_pooling_test.cc
浏览文件 @
77734ce7
...
...
@@ -101,13 +101,13 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) {
TEST
(
SequencePoolingGrad
,
CPU_SUM
)
{
paddle
::
framework
::
LoD
lod1
;
lod1
.
push_back
(
std
::
vector
<
size
_t
>
{
0
,
10
});
lod1
.
push_back
(
std
::
vector
<
uint64
_t
>
{
0
,
10
});
TestSequencePoolingSum
<
paddle
::
platform
::
CPUDeviceContext
,
paddle
::
platform
::
CPUPlace
,
float
>
(
lod1
);
paddle
::
framework
::
LoD
lod2
;
lod2
.
push_back
(
std
::
vector
<
size
_t
>
{
0
,
2
,
7
,
10
});
lod2
.
push_back
(
std
::
vector
<
uint64
_t
>
{
0
,
2
,
7
,
10
});
TestSequencePoolingSum
<
paddle
::
platform
::
CPUDeviceContext
,
paddle
::
platform
::
CPUPlace
,
float
>
(
lod2
);
...
...
@@ -116,13 +116,13 @@ TEST(SequencePoolingGrad, CPU_SUM) {
#ifdef PADDLE_WITH_CUDA
TEST
(
SequencePoolingGrad
,
CUDA_SUM
)
{
paddle
::
framework
::
LoD
lod1
;
lod1
.
push_back
(
std
::
vector
<
size
_t
>
{
0
,
10
});
lod1
.
push_back
(
std
::
vector
<
uint64
_t
>
{
0
,
10
});
TestSequencePoolingSum
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
CUDAPlace
,
float
>
(
lod1
);
paddle
::
framework
::
LoD
lod2
;
lod2
.
push_back
(
std
::
vector
<
size
_t
>
{
0
,
2
,
7
,
10
});
lod2
.
push_back
(
std
::
vector
<
uint64
_t
>
{
0
,
2
,
7
,
10
});
TestSequencePoolingSum
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
CUDAPlace
,
float
>
(
lod2
);
...
...
lite/backends/x86/math/sequence_scale.cc
浏览文件 @
77734ce7
...
...
@@ -32,7 +32,7 @@ class ScaleLoDTensorFunctor<lite::TargetType::kX86, T> {
size_t
seq_width
=
seq
->
dims
()[
1
];
lite
::
LoD
abs_offset_lod
=
lite
::
fluid
::
ToAbsOffset
(
lod
);
T
*
seq_data
=
seq
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
T
*
seq_data
=
seq
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
for
(
size_t
i
=
0
;
i
<
num_seq
;
++
i
)
{
for
(
size_t
j
=
lod
[
level
][
i
]
*
seq_width
;
j
<
lod
[
level
][
i
+
1
]
*
seq_width
;
...
...
lite/backends/x86/math/sequence_topk_avg_pooling.cc
浏览文件 @
77734ce7
...
...
@@ -83,7 +83,7 @@ class SequenceTopkAvgPoolingFunctor<lite::TargetType::kX86, T> {
auto
pos_data
=
pos
->
mutable_data
<
int
>
(
lite
::
TargetType
::
kX86
);
int
offset
=
0
;
std
::
vector
<
size
_t
>
vec_out_lod
;
std
::
vector
<
uint64
_t
>
vec_out_lod
;
vec_out_lod
.
reserve
(
batch_size
+
1
);
for
(
int
i
=
0
;
i
<=
batch_size
;
++
i
)
{
offset
=
row_lod
[
i
];
...
...
@@ -95,7 +95,7 @@ class SequenceTopkAvgPoolingFunctor<lite::TargetType::kX86, T> {
out
->
set_lod
(
lod_temp
);
auto
in_data
=
in
.
data
<
T
>
();
auto
out_data
=
out
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
auto
out_data
=
out
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
T
*
sum_data
=
new
T
[
max_k
];
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
...
...
lite/backends/x86/math/softmax_impl.h
浏览文件 @
77734ce7
...
...
@@ -108,8 +108,8 @@ class SoftmaxFunctor<Target, T, is_test, enable_if_CPU<Target>> {
const
int
num_remain
=
num_classes
/
axis_dim
;
if
(
num_remain
==
1
&&
lite
::
x86
::
MayIUse
(
lite
::
x86
::
avx
))
{
const
T
*
in_data
=
X
->
data
<
T
>
();
auto
*
out_data
=
Y
->
mutable_data
<
T
>
();
const
T
*
in_data
=
X
->
template
data
<
T
>();
auto
*
out_data
=
Y
->
template
mutable_data
<
T
>();
for
(
int
bs
=
0
;
bs
<
batch_size
;
++
bs
)
{
T
max_val
=
*
std
::
max_element
(
in_data
,
in_data
+
num_classes
);
max_val
*=
static_cast
<
T
>
(
-
1
);
...
...
@@ -219,9 +219,9 @@ class SoftmaxGradFunctor<Target, T, enable_if_CPU<Target>> {
const
int
num_remain
=
num_classes
/
axis_dim
;
if
(
num_remain
==
1
&&
lite
::
x86
::
MayIUse
(
lite
::
x86
::
avx
))
{
const
T
*
out_data
=
y
->
data
<
T
>
();
const
T
*
out_grad
=
y_grad
->
data
<
T
>
();
T
*
in_grad
=
x_grad
->
mutable_data
<
T
>
();
const
T
*
out_data
=
y
->
template
data
<
T
>();
const
T
*
out_grad
=
y_grad
->
template
data
<
T
>();
T
*
in_grad
=
x_grad
->
template
mutable_data
<
T
>();
for
(
int
bs
=
0
;
bs
<
batch_size
;
++
bs
)
{
T
scalar
;
vec_mul_reduce
<
T
,
lite
::
x86
::
avx
>
(
...
...
lite/backends/x86/math/tree2col.cc
浏览文件 @
77734ce7
...
...
@@ -104,12 +104,12 @@ class Tree2ColFunctor<lite::TargetType::kX86, T> {
patch_size
=
processing_list
.
size
();
// T *patch_data =
// patch->mutable_data<T>({static_cast<int64_t>(patch_size),
// patch->
template
mutable_data<T>({static_cast<int64_t>(patch_size),
// static_cast<int64_t>(patch_elem_size)},
// cpu_place);
patch
->
Resize
({
static_cast
<
int64_t
>
(
patch_size
),
static_cast
<
int64_t
>
(
patch_elem_size
)});
auto
*
patch_data
=
patch
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
auto
*
patch_data
=
patch
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
constant
(
context
,
patch
,
0
);
const
T
*
features
=
node_features
.
data
<
T
>
();
...
...
@@ -166,12 +166,12 @@ class Col2TreeFunctor<lite::TargetType::kX86, T> {
}
}
// T *grad_data =
// in_grad->mutable_data<T>({static_cast<int64_t>(node_count),
// in_grad->
template
mutable_data<T>({static_cast<int64_t>(node_count),
// static_cast<int64_t>(grad_elem_size)},
// cpu_place);
in_grad
->
Resize
({
static_cast
<
int64_t
>
(
node_count
),
static_cast
<
int64_t
>
(
grad_elem_size
)});
auto
*
grad_data
=
in_grad
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
auto
*
grad_data
=
in_grad
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
constant
(
context
,
in_grad
,
0
);
const
T
*
out_g
=
out_grad
.
data
<
T
>
();
...
...
lite/backends/x86/math/unpooling.cc
浏览文件 @
77734ce7
...
...
@@ -36,7 +36,7 @@ class Unpool2dMaxFunctor<lite::TargetType::kX86, T> {
int
output_feasize
=
output_height
*
output_width
;
const
T
*
input_data
=
input
.
data
<
T
>
();
const
int
*
indices_data
=
indices
.
data
<
int
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
T
*
output_data
=
output
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
for
(
int
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
i
=
0
;
i
<
input_feasize
;
++
i
)
{
...
...
@@ -70,7 +70,8 @@ class Unpool2dMaxGradFunctor<lite::TargetType::kX86, T> {
int
output_feasize
=
output_height
*
output_width
;
const
int
*
indices_data
=
indices
.
data
<
int
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
lite
::
TargetType
::
kX86
);
T
*
input_grad_data
=
input_grad
->
template
mutable_data
<
T
>(
lite
::
TargetType
::
kX86
);
for
(
int
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
...
...
lite/backends/x86/math/vol2col.cc
浏览文件 @
77734ce7
...
...
@@ -75,7 +75,7 @@ class Vol2ColFunctor<lite::TargetType::kX86, T> {
"mismatching."
);
const
T
*
vol_data
=
vol
.
data
<
T
>
();
T
*
col_data
=
col
->
mutable_data
<
T
>
();
T
*
col_data
=
col
->
template
mutable_data
<
T
>();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
...
...
@@ -159,7 +159,7 @@ class Col2VolFunctor<lite::TargetType::kX86, T> {
output_width
,
"input_width and output_width are "
"mismatching."
);
T
*
vol_data
=
vol
->
mutable_data
<
T
>
();
T
*
vol_data
=
vol
->
template
mutable_data
<
T
>();
const
T
*
col_data
=
col
.
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
...
...
lite/fluid/lod.h
浏览文件 @
77734ce7
...
...
@@ -19,7 +19,7 @@
namespace
paddle
{
namespace
lite
{
namespace
fluid
{
using
LoD
=
std
::
vector
<
std
::
vector
<
size
_t
>>
;
using
LoD
=
std
::
vector
<
std
::
vector
<
uint64
_t
>>
;
static
LoD
ToAbsOffset
(
const
LoD
&
in
)
{
// the lowest level stores relative offsets
...
...
lite/kernels/x86/activation_compute.h
浏览文件 @
77734ce7
...
...
@@ -231,8 +231,8 @@ class SoftsignCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// auto& context = ctx_->As<X86Context>();
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
ActivationParam
>
();
const
T
*
x_data
=
param
.
X
->
data
<
T
>
();
T
*
out_data
=
param
.
Out
->
mutable_data
<
T
>
();
const
T
*
x_data
=
param
.
X
->
template
data
<
T
>();
T
*
out_data
=
param
.
Out
->
template
mutable_data
<
T
>();
size_t
x_size
=
param
.
X
->
numel
();
for
(
size_t
i
=
0
;
i
<
x_size
;
i
++
)
{
out_data
[
i
]
=
x_data
[
i
]
/
(
static_cast
<
T
>
(
1
)
+
std
::
abs
(
x_data
[
i
]));
...
...
lite/kernels/x86/attention_padding_mask_compute.h
浏览文件 @
77734ce7
...
...
@@ -45,9 +45,9 @@ class AttentionPaddingMaskCompute
auto
src_len
=
static_cast
<
int64_t
>
(
bottom1
->
lod
()[
0
][
1
]);
const
int
att_batch
=
bottom0
->
lod
()[
0
].
size
()
-
1
;
const
int
src_batch
=
bottom1
->
lod
()[
0
].
size
()
-
1
;
int
*
pad_begin
=
_pad_begin
->
mutable_data
<
int
>
();
int
*
pad_begin
=
_pad_begin
->
template
mutable_data
<
int
>();
for
(
int
i
=
0
;
i
<
src_batch
;
++
i
)
{
const
auto
*
src_data
=
bottom1
->
data
<
T
>
()
+
src_len
*
i
;
const
auto
*
src_data
=
bottom1
->
template
data
<
T
>()
+
src_len
*
i
;
int
index
=
src_len
-
1
;
for
(;
index
>=
0
&&
_pad_id
==
static_cast
<
int
>
(
src_data
[
index
]);
--
index
)
{
...
...
@@ -56,13 +56,14 @@ class AttentionPaddingMaskCompute
}
const
auto
att_len
=
static_cast
<
int64_t
>
(
bottom0
->
lod
()[
0
][
1
]);
auto
*
top_data
=
top
->
mutable_data
<
T
>
();
auto
*
top_data
=
top
->
template
mutable_data
<
T
>();
memcpy
(
top_data
,
bottom0
->
data
<
T
>
(),
bottom0
->
template
data
<
T
>(),
bottom0
->
dims
()[
0
]
*
bottom0
->
dims
()[
1
]
*
sizeof
(
T
));
for
(
int
i
=
0
;
i
<
att_batch
;
++
i
)
{
for
(
int
j
=
0
;
j
<
att_len
;
++
j
)
{
top_data
=
top
->
mutable_data
<
T
>
()
+
src_len
*
(
att_len
*
i
+
j
);
top_data
=
top
->
template
mutable_data
<
T
>()
+
src_len
*
(
att_len
*
i
+
j
);
int
src_idx
=
i
%
src_batch
;
for
(
int
k
=
pad_begin
[
src_idx
];
k
<
src_len
;
++
k
)
{
top_data
[
k
]
=
_mask
;
...
...
lite/kernels/x86/batch_norm_compute.h
浏览文件 @
77734ce7
...
...
@@ -59,26 +59,26 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
const
int
sample_size
=
x
->
dims
().
production
()
/
N
/
C
;
// alloc memory
param
.
y
->
mutable_data
<
T
>
();
param
.
y
->
template
mutable_data
<
T
>();
if
(
!
param
.
is_test
)
{
param
.
mean_out
->
mutable_data
<
T
>
();
param
.
variance_out
->
mutable_data
<
T
>
();
param
.
saved_mean
->
mutable_data
<
T
>
();
param
.
saved_variance
->
mutable_data
<
T
>
();
param
.
mean_out
->
template
mutable_data
<
T
>();
param
.
variance_out
->
template
mutable_data
<
T
>();
param
.
saved_mean
->
template
mutable_data
<
T
>();
param
.
saved_variance
->
template
mutable_data
<
T
>();
}
if
(
!
global_stats
)
{
// saved_xx is use just in this batch of data
EigenVectorArrayMap
<
T
>
saved_mean_e
(
param
.
saved_mean
->
mutable_data
<
T
>
(),
C
);
EigenVectorArrayMap
<
T
>
saved_mean_e
(
param
.
saved_mean
->
template
mutable_data
<
T
>(),
C
);
EigenVectorArrayMap
<
T
>
saved_variance_e
(
param
.
saved_variance
->
mutable_data
<
T
>
(),
C
);
param
.
saved_variance
->
template
mutable_data
<
T
>(),
C
);
saved_mean_e
.
setZero
();
saved_variance_e
.
setZero
();
EigenVectorArrayMap
<
T
>
running_mean_arr
(
param
.
mean_out
->
mutable_data
<
T
>
(),
C
);
EigenVectorArrayMap
<
T
>
running_mean_arr
(
param
.
mean_out
->
template
mutable_data
<
T
>(),
C
);
EigenVectorArrayMap
<
T
>
running_var_arr
(
param
.
variance_out
->
mutable_data
<
T
>
(),
C
);
param
.
variance_out
->
template
mutable_data
<
T
>(),
C
);
if
((
N
*
sample_size
)
==
1
)
{
LOG
(
WARNING
)
<<
"Only 1 element in normalization dimension, "
...
...
@@ -89,7 +89,8 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
switch
(
param
.
data_layout
)
{
case
DATALAYOUT
(
kNCHW
):
{
ConstEigenArrayMap
<
T
>
x_arr
(
x
->
data
<
T
>
(),
sample_size
,
N
*
C
);
ConstEigenArrayMap
<
T
>
x_arr
(
x
->
template
data
<
T
>(),
sample_size
,
N
*
C
);
for
(
int
nc
=
0
;
nc
<
N
*
C
;
++
nc
)
{
saved_mean_e
(
nc
%
C
)
+=
x_arr
.
col
(
nc
).
sum
();
}
...
...
@@ -115,33 +116,37 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// use SavedMean and SavedVariance to do normalize
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>
inv_std
(
C
);
if
(
global_stats
)
{
ConstEigenVectorArrayMap
<
T
>
var_arr
(
param
.
variance
->
data
<
T
>
(),
C
);
ConstEigenVectorArrayMap
<
T
>
var_arr
(
param
.
variance
->
template
data
<
T
>(),
C
);
inv_std
=
(
var_arr
+
param
.
epsilon
).
sqrt
().
inverse
();
}
else
{
EigenVectorArrayMap
<
T
>
saved_inv_std
(
param
.
saved_variance
->
mutable_data
<
T
>
(),
C
);
param
.
saved_variance
->
template
mutable_data
<
T
>(),
C
);
// inverse SavedVariance first, gradient will use it too.
saved_inv_std
=
(
saved_inv_std
+
param
.
epsilon
).
inverse
().
sqrt
();
inv_std
=
saved_inv_std
;
}
ConstEigenVectorArrayMap
<
T
>
mean_arr
(
global_stats
?
param
.
mean
->
data
<
T
>
()
:
param
.
saved_mean
->
data
<
T
>
(),
C
);
global_stats
?
param
.
mean
->
template
data
<
T
>()
:
param
.
saved_mean
->
template
data
<
T
>(),
C
);
// ((x - est_mean) * (inv_var) * scale + bias
// formula transform ====>
// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
ConstEigenVectorArrayMap
<
T
>
scale_arr
(
param
.
scale
->
data
<
T
>
(),
C
);
ConstEigenVectorArrayMap
<
T
>
bias_arr
(
param
.
bias
->
data
<
T
>
(),
C
);
ConstEigenVectorArrayMap
<
T
>
scale_arr
(
param
.
scale
->
template
data
<
T
>(),
C
);
ConstEigenVectorArrayMap
<
T
>
bias_arr
(
param
.
bias
->
template
data
<
T
>(),
C
);
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>
new_scale
=
inv_std
*
scale_arr
;
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>
new_bias
=
bias_arr
-
mean_arr
*
inv_std
*
scale_arr
;
switch
(
param
.
data_layout
)
{
case
DATALAYOUT
(
kNCHW
):
{
EigenArrayMap
<
T
>
y_arr
(
param
.
y
->
mutable_data
<
T
>
(),
sample_size
,
N
*
C
);
ConstEigenArrayMap
<
T
>
x_arr
(
x
->
data
<
T
>
(),
sample_size
,
N
*
C
);
EigenArrayMap
<
T
>
y_arr
(
param
.
y
->
template
mutable_data
<
T
>(),
sample_size
,
N
*
C
);
ConstEigenArrayMap
<
T
>
x_arr
(
x
->
template
data
<
T
>(),
sample_size
,
N
*
C
);
for
(
int
nc
=
0
;
nc
<
N
*
C
;
++
nc
)
{
y_arr
.
col
(
nc
)
=
x_arr
.
col
(
nc
)
*
new_scale
(
nc
%
C
)
+
new_bias
(
nc
%
C
);
}
...
...
lite/kernels/x86/concat_compute.h
浏览文件 @
77734ce7
...
...
@@ -47,7 +47,7 @@ class ConcatCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
int64_t
axis
=
static_cast
<
int64_t
>
(
param
.
axis
);
auto
*
axis_tensor
=
param
.
axis_tensor
;
if
(
axis_tensor
!=
nullptr
)
{
auto
*
axis_tensor_data
=
axis_tensor
->
data
<
int
>
();
auto
*
axis_tensor_data
=
axis_tensor
->
template
data
<
int
>();
axis
=
static_cast
<
int64_t
>
(
axis_tensor_data
[
0
]);
}
...
...
@@ -60,7 +60,7 @@ class ConcatCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
int
concat_input_size
=
count
(
axis
+
1
,
x_dims
.
size
(),
x_dims
);
const
int
top_concat_axis
=
out
->
dims
()[
axis
];
for
(
size_t
i
=
0
;
i
<
param
.
x
.
size
();
++
i
)
{
const
T
*
bottom_data
=
param
.
x
[
i
]
->
data
<
T
>
();
const
T
*
bottom_data
=
param
.
x
[
i
]
->
template
data
<
T
>();
const
int64_t
bottom_concat_axis
=
param
.
x
[
i
]
->
dims
()[
axis
];
for
(
int
n
=
0
;
n
<
num_concat
;
++
n
)
{
std
::
memcpy
(
...
...
lite/kernels/x86/conv_compute.h
浏览文件 @
77734ce7
...
...
@@ -52,7 +52,7 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
ConvParam
>
();
lite
::
Tensor
filter
=
*
param
.
filter
;
param
.
output
->
mutable_data
<
T
>
();
param
.
output
->
template
mutable_data
<
T
>();
const
int
batch_size
=
static_cast
<
int
>
(
param
.
x
->
dims
()[
0
]);
std
::
vector
<
int64_t
>
filter_shape_vec
(
filter
.
dims
().
Vectorize
());
...
...
@@ -95,9 +95,9 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto
blas
=
paddle
::
lite
::
x86
::
math
::
GetBlas
<
lite
::
TargetType
::
kX86
,
T
>
(
context
);
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
lite
::
Tensor
in_batch
=
param
.
x
->
Slice
<
T
>
(
i
,
i
+
1
);
lite
::
Tensor
in_batch
=
param
.
x
->
template
Slice
<
T
>(
i
,
i
+
1
);
in_batch
.
Resize
(
input_shape
);
lite
::
Tensor
out_batch
=
param
.
output
->
Slice
<
T
>
(
i
,
i
+
1
);
lite
::
Tensor
out_batch
=
param
.
output
->
template
Slice
<
T
>(
i
,
i
+
1
);
out_batch
.
Resize
(
output_matrix_shape
);
for
(
int
g
=
0
;
g
<
param
.
groups
;
g
++
)
{
lite
::
Tensor
in_slice
=
...
...
lite/kernels/x86/dropout_compute.h
浏览文件 @
77734ce7
...
...
@@ -38,10 +38,10 @@ class DropoutCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
using
param_t
=
operators
::
DropoutParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
DropoutParam
>
();
const
auto
*
x_data
=
param
.
x
->
data
<
T
>
();
auto
*
out_data
=
param
.
output
->
mutable_data
<
T
>
();
const
auto
*
x_data
=
param
.
x
->
template
data
<
T
>();
auto
*
out_data
=
param
.
output
->
template
mutable_data
<
T
>();
if
(
!
param
.
is_test
)
{
auto
*
mask_data
=
param
.
mask
->
mutable_data
<
T
>
();
auto
*
mask_data
=
param
.
mask
->
template
mutable_data
<
T
>();
std
::
random_device
rnd
;
std
::
minstd_rand
engine
;
int
seed
=
param
.
fix_seed
?
param
.
seed
:
rnd
();
...
...
lite/kernels/x86/elementwise_op_function.h
浏览文件 @
77734ce7
...
...
@@ -248,8 +248,8 @@ class TransformFunctor {
lite
::
Tensor
*
z
,
const
lite
::
Context
<
Target
>
&
ctx
,
Functor
func
)
:
x_
(
x
->
data
<
T
>
()),
y_
(
y
->
data
<
T
>
()),
:
x_
(
x
->
template
data
<
T
>()),
y_
(
y
->
template
data
<
T
>()),
z_
(
z
->
mutable_data
<
OutType
>
()),
nx_
(
x
->
numel
()),
ctx_
(
ctx
),
...
...
@@ -483,9 +483,10 @@ void FusedElemwiseAndActComputeNoBroadcast(const lite::Context<Target> &ctx,
x
.
data
<
T
>
(),
y
.
data
<
T
>
(),
compound_functor
,
out
->
mutable_data
<
T
>
(),
intermediate_out
==
nullptr
?
nullptr
:
intermediate_out
->
mutable_data
<
T
>
()});
out
->
template
mutable_data
<
T
>(),
intermediate_out
==
nullptr
?
nullptr
:
intermediate_out
->
template
mutable_data
<
T
>()});
}
template
<
lite
::
TargetType
Target
,
...
...
@@ -523,9 +524,10 @@ void FusedElemwiseAndActComputeWithBroadcast(const lite::Context<Target> &ctx,
compound_functor
,
h
,
w
,
out
->
mutable_data
<
T
>
(),
intermediate_out
==
nullptr
?
nullptr
:
intermediate_out
->
mutable_data
<
T
>
());
out
->
template
mutable_data
<
T
>(),
intermediate_out
==
nullptr
?
nullptr
:
intermediate_out
->
template
mutable_data
<
T
>());
}
else
{
FusedElemwiseAndActBroadcast2CPU
<
T
,
...
...
@@ -539,9 +541,10 @@ void FusedElemwiseAndActComputeWithBroadcast(const lite::Context<Target> &ctx,
n
,
post
,
compound_functor
,
out
->
mutable_data
<
T
>
(),
intermediate_out
==
nullptr
?
nullptr
:
intermediate_out
->
mutable_data
<
T
>
());
out
->
template
mutable_data
<
T
>(),
intermediate_out
==
nullptr
?
nullptr
:
intermediate_out
->
template
mutable_data
<
T
>());
}
}
...
...
lite/kernels/x86/fc_compute.h
浏览文件 @
77734ce7
...
...
@@ -140,9 +140,9 @@ class FcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
int
M
=
output
->
dims
().
production
()
/
w_dims1
;
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
w_data
=
w
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
();
const
T
*
input_data
=
input
->
template
data
<
T
>();
const
T
*
w_data
=
w
->
template
data
<
T
>();
T
*
output_data
=
output
->
template
mutable_data
<
T
>();
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
FCFunctor
<
lite
::
TargetType
::
kX86
,
T
>
fc
;
...
...
@@ -153,7 +153,7 @@ class FcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
input_data
,
w_data
,
output_data
,
bias
?
bias
->
data
<
T
>
()
:
NULL
,
bias
?
bias
->
template
data
<
T
>()
:
NULL
,
with_relu
,
padding_weights
);
}
...
...
lite/kernels/x86/fill_constant_batch_size_like_compute.h
浏览文件 @
77734ce7
...
...
@@ -42,9 +42,9 @@ class FillConstantBatchSizeLikeCompute
int
output_dim_idx
=
param
.
output_dim_idx
;
odims
[
output_dim_idx
]
=
static_cast
<
int
>
(
in
->
lod
().
back
().
size
())
-
1
;
out
->
Resize
(
odims
);
// out->mutable_data<T>();
// out->
template
mutable_data<T>();
}
out
->
mutable_data
<
T
>
();
out
->
template
mutable_data
<
T
>();
auto
value
=
param
.
value
;
paddle
::
lite
::
x86
::
math
::
SetConstant
<
lite
::
TargetType
::
kX86
,
T
>
setter
;
...
...
lite/kernels/x86/gather_compute.h
浏览文件 @
77734ce7
...
...
@@ -50,9 +50,9 @@ void CPUGather(const lite::Tensor* src,
auto
src_dims
=
src
->
dims
();
const
T
*
p_src
=
src
->
data
<
T
>
();
const
T
*
p_src
=
src
->
template
data
<
T
>();
const
IndexT
*
p_index
=
index
->
data
<
IndexT
>
();
T
*
p_output
=
output
->
mutable_data
<
T
>
();
T
*
p_output
=
output
->
template
mutable_data
<
T
>();
// slice size
int
slice_size
=
1
;
...
...
@@ -77,7 +77,7 @@ class GatherCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto
index
=
param
.
Index
;
auto
out
=
param
.
Out
;
out
->
mutable_data
<
T
>
();
out
->
template
mutable_data
<
T
>();
if
(
x
->
dims
().
production
()
==
0
)
return
;
/*
* Since there's no type defined for lite::Tensor in Paddle-Lite, then
...
...
lite/kernels/x86/gru_compute.h
浏览文件 @
77734ce7
...
...
@@ -44,7 +44,7 @@ inline void ReorderInitState(const lite::Context<TARGET(kX86)>& context,
bool
indexed_src
)
{
lite
::
x86
::
math
::
CopyMatrixRowsFunctor
<
TARGET
(
kX86
),
T
>
row_shuffle
;
dst
->
Resize
(
src
.
dims
());
dst
->
mutable_data
<
T
>
();
dst
->
template
mutable_data
<
T
>();
row_shuffle
(
context
,
src
,
index_lod
,
dst
,
indexed_src
);
}
...
...
@@ -65,18 +65,19 @@ class GRUCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto
*
input
=
param
.
input
;
auto
*
h0
=
param
.
h0
;
auto
*
weight
=
param
.
weight
;
const
T
*
weight_data
=
weight
->
data
<
T
>
();
const
T
*
weight_data
=
weight
->
template
data
<
T
>();
auto
*
bias
=
param
.
bias
;
auto
*
batch_gate
=
param
.
batch_gate
;
auto
*
batch_reset_hidden_prev
=
param
.
batch_reset_hidden_prev
;
auto
*
batch_hidden
=
param
.
batch_hidden
;
T
*
batch_gate_ptr
=
batch_gate
->
mutable_data
<
T
>
();
T
*
batch_reset_hidden_prev_ptr
=
batch_reset_hidden_prev
->
mutable_data
<
T
>
();
T
*
batch_hidden_ptr
=
batch_hidden
->
mutable_data
<
T
>
();
T
*
batch_gate_ptr
=
batch_gate
->
template
mutable_data
<
T
>();
T
*
batch_reset_hidden_prev_ptr
=
batch_reset_hidden_prev
->
template
mutable_data
<
T
>();
T
*
batch_hidden_ptr
=
batch_hidden
->
template
mutable_data
<
T
>();
auto
*
hidden
=
param
.
hidden
;
hidden
->
mutable_data
<
T
>
();
hidden
->
template
mutable_data
<
T
>();
const
auto
&
hidden_dims
=
hidden
->
dims
();
...
...
@@ -99,7 +100,7 @@ class GRUCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// Since the batch computing for GRU reorders the input sequences
// according to their length. The initialized cell state also needs
// to reorder.
const
std
::
vector
<
size
_t
>&
order
(
batch_gate
->
lod
()[
2
]);
const
std
::
vector
<
uint64
_t
>&
order
(
batch_gate
->
lod
()[
2
]);
ReorderInitState
<
T
>
(
context
,
*
h0
,
order
,
&
ordered_h0
,
true
);
gru_value
.
prev_out_value
=
ordered_h0
.
mutable_data
<
T
>
();
}
else
{
...
...
lite/kernels/x86/layer_norm_compute.h
浏览文件 @
77734ce7
...
...
@@ -47,9 +47,9 @@ class LayerNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto
x_dims
=
x
->
dims
();
y
->
mutable_data
<
T
>
();
Mean
->
mutable_data
<
T
>
();
Var
->
mutable_data
<
T
>
();
y
->
template
mutable_data
<
T
>();
Mean
->
template
mutable_data
<
T
>();
Var
->
template
mutable_data
<
T
>();
auto
matrix_dim
=
x_dims
.
Flatten2D
(
begin_norm_axis
);
int
left
=
static_cast
<
int
>
(
matrix_dim
[
0
]);
...
...
@@ -73,10 +73,10 @@ class LayerNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
.
At
(
right
);
ker
(
in
.
mutable_data
<
T
>
(),
out
.
mutable_data
<
T
>
(),
Mean
->
mutable_data
<
T
>
(),
Var
->
mutable_data
<
T
>
(),
Scale
->
data
<
T
>
(),
Bias
->
data
<
T
>
(),
Mean
->
template
mutable_data
<
T
>(),
Var
->
template
mutable_data
<
T
>(),
Scale
->
template
data
<
T
>(),
Bias
->
template
data
<
T
>(),
static_cast
<
int
>
(
left
),
epsilon
,
right
);
...
...
lite/kernels/x86/lookup_table_compute.h
浏览文件 @
77734ce7
...
...
@@ -33,15 +33,15 @@ class LookupTableCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto
*
ids_t
=
param
.
Ids
;
auto
*
output_t
=
param
.
Out
;
int64_t
padding_idx
=
param
.
padding_idx
;
const
int64_t
*
ids
=
ids_t
->
data
<
int64_t
>
();
const
int64_t
*
ids
=
ids_t
->
template
data
<
int64_t
>();
int64_t
ids_numel
=
ids_t
->
dims
().
production
();
auto
*
table_t
=
param
.
W
;
int64_t
row_number
=
table_t
->
dims
()[
0
];
int64_t
row_width
=
table_t
->
dims
()[
1
];
const
T
*
table
=
table_t
->
data
<
T
>
();
T
*
output
=
output_t
->
mutable_data
<
T
>
();
const
T
*
table
=
table_t
->
template
data
<
T
>();
T
*
output
=
output_t
->
template
mutable_data
<
T
>();
memset
(
output
,
0
,
output_t
->
dims
().
production
()
*
sizeof
(
T
));
for
(
int64_t
i
=
0
;
i
<
ids_numel
;
++
i
)
{
if
(
padding_idx
!=
-
1
&&
ids
[
i
]
==
padding_idx
)
{
...
...
lite/kernels/x86/match_matrix_tensor_compute.cc
浏览文件 @
77734ce7
...
...
@@ -35,7 +35,7 @@ void MatchMatrixTensorCompute<T>::Run() {
const
auto
&
offset_l
=
x
->
lod
()[
0
];
const
auto
&
offset_r
=
y
->
lod
()[
0
];
std
::
vector
<
size
_t
>
top_offset
;
std
::
vector
<
uint64
_t
>
top_offset
;
int
top_size
=
0
;
top_offset
.
push_back
(
top_size
);
for
(
size_t
b
=
0
;
b
<
x
->
lod
()[
0
].
size
()
-
1
;
b
++
)
{
...
...
@@ -97,9 +97,9 @@ void MatchMatrixTensorCompute<T>::Run() {
int
batch_size
=
x
->
lod
()[
0
].
size
()
-
1
;
int
lod_lv1_size
=
batch_size
*
dim_t
;
int
lod_lv2_size
=
x
->
lod
()[
0
].
back
()
*
dim_t
;
std
::
vector
<
size
_t
>
out_lod0
(
batch_size
+
1
,
0
);
std
::
vector
<
size
_t
>
out_lod1
(
lod_lv1_size
+
1
,
0
);
std
::
vector
<
size
_t
>
out_lod2
(
lod_lv2_size
+
1
,
0
);
std
::
vector
<
uint64
_t
>
out_lod0
(
batch_size
+
1
,
0
);
std
::
vector
<
uint64
_t
>
out_lod1
(
lod_lv1_size
+
1
,
0
);
std
::
vector
<
uint64
_t
>
out_lod2
(
lod_lv2_size
+
1
,
0
);
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
out_lod0
[
i
+
1
]
=
out_lod0
[
i
]
+
dim_t
;
int
len_l
=
offset_l
[
i
+
1
]
-
offset_l
[
i
];
...
...
lite/kernels/x86/matmul_compute.h
浏览文件 @
77734ce7
...
...
@@ -56,7 +56,7 @@ class MatMulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto
*
x
=
param
.
X
;
auto
*
y
=
param
.
Y
;
auto
*
out
=
param
.
Out
;
out
->
mutable_data
<
T
>
();
out
->
template
mutable_data
<
T
>();
auto
blas
=
lite
::
x86
::
math
::
GetBlas
<
lite
::
TargetType
::
kX86
,
T
>
(
context
);
auto
mat_dim_a
=
lite
::
x86
::
math
::
CreateMatrixDescriptor
(
...
...
lite/kernels/x86/mul_compute.h
浏览文件 @
77734ce7
...
...
@@ -64,7 +64,7 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
y_matrix
=
*
y
;
}
z
->
mutable_data
<
T
>
();
z
->
template
mutable_data
<
T
>();
auto
z_dim
=
z
->
dims
();
if
(
z_dim
.
size
()
!=
2
)
{
z
->
Resize
({
x_matrix
.
dims
()[
0
],
y_matrix
.
dims
()[
1
]});
...
...
lite/kernels/x86/reduce_compute.h
浏览文件 @
77734ce7
...
...
@@ -49,7 +49,7 @@ class ReduceSumCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
bool
reduce_all
=
param
.
reduce_all
;
auto
*
input
=
param
.
x
;
auto
*
output
=
param
.
output
;
param
.
output
->
mutable_data
<
T
>
();
param
.
output
->
template
mutable_data
<
T
>();
const
auto
&
dims
=
param
.
dim
;
bool
keep_dim
=
param
.
keep_dim
;
...
...
lite/kernels/x86/scale_compute.h
浏览文件 @
77734ce7
...
...
@@ -41,8 +41,8 @@ class ScaleCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
scale_compute
(
param
.
x
->
data
<
T
>
(),
param
.
output
->
mutable_data
<
T
>
(),
scale_compute
(
param
.
x
->
template
data
<
T
>(),
param
.
output
->
template
mutable_data
<
T
>(),
param
.
x
->
dims
().
production
(),
param
.
scale
,
param
.
bias
,
...
...
lite/kernels/x86/search_grnn_compute.cc
浏览文件 @
77734ce7
...
...
@@ -84,7 +84,7 @@ void SearchGrnnCompute<T>::PrepareLayout(const Tensor* input_blob) {
int
max_width
=
width_data
[
idx_sorted_by_width_data
[
0
]];
// start of reorganizing the input
std
::
vector
<
size
_t
>
new_offset
;
std
::
vector
<
uint64
_t
>
new_offset
;
new_offset
.
resize
(
max_width
+
1
);
new_offset
[
0
]
=
0
;
...
...
lite/kernels/x86/search_group_padding_compute.h
浏览文件 @
77734ce7
...
...
@@ -50,7 +50,7 @@ class SearchGroupPaddingCompute
}
}
std
::
vector
<
size
_t
>
new_offset
;
std
::
vector
<
uint64
_t
>
new_offset
;
new_offset
.
resize
(
batch
+
1
);
for
(
int
i
=
0
;
i
<
batch
+
1
;
++
i
)
{
new_offset
[
i
]
=
i
*
max_seq
;
...
...
@@ -67,7 +67,7 @@ class SearchGroupPaddingCompute
top1_lod
.
push_back
(
offset
);
top1
->
set_lod
(
top1_lod
);
top1
->
Resize
({
dim0
,
1
});
memset
(
top1
->
mutable_data
<
T
>
(),
memset
(
top1
->
template
mutable_data
<
T
>(),
0
,
top1
->
dims
()[
0
]
*
top1
->
dims
()[
1
]
*
sizeof
(
T
));
// for padding input id
...
...
@@ -76,9 +76,9 @@ class SearchGroupPaddingCompute
top2
->
set_lod
(
top2_lod
);
top2
->
Resize
({
batch
*
max_seq
,
1
});
// copy data
const
auto
*
bottom_data
=
bottom0
->
data
<
T
>
();
auto
*
top_data
=
top0
->
mutable_data
<
T
>
();
auto
*
top_padding_input_data
=
top2
->
mutable_data
<
T
>
();
const
auto
*
bottom_data
=
bottom0
->
template
data
<
T
>();
auto
*
top_data
=
top0
->
template
mutable_data
<
T
>();
auto
*
top_padding_input_data
=
top2
->
template
mutable_data
<
T
>();
for
(
int
i
=
0
;
i
<
batch
;
i
++
)
{
const
int
copy_step
=
offset
[
i
+
1
]
-
offset
[
i
];
const
int
start
=
i
*
max_seq
;
...
...
lite/kernels/x86/search_seq_fc_compute.h
浏览文件 @
77734ce7
...
...
@@ -58,8 +58,10 @@ class SearchSeqFcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
int
M
=
x_dims
[
0
];
int
N
=
w_dims
[
0
];
for
(
int
i
=
0
;
i
<
M
;
i
++
)
{
blas
.
AXPY
(
N
,
static_cast
<
T
>
(
1
),
b
->
data
<
T
>
(),
out
->
mutable_data
<
T
>
()
+
i
*
N
);
blas
.
AXPY
(
N
,
static_cast
<
T
>
(
1
),
b
->
template
data
<
T
>(),
out
->
template
mutable_data
<
T
>()
+
i
*
N
);
}
}
}
...
...
lite/kernels/x86/sequence_arithmetic_compute.h
浏览文件 @
77734ce7
...
...
@@ -39,9 +39,9 @@ class SequenceArithmeticCompute
out
->
Resize
(
x
->
dims
());
out
->
set_lod
(
x
->
lod
());
auto
x_data
=
x
->
data
<
T
>
();
auto
y_data
=
y
->
data
<
T
>
();
auto
out_data
=
out
->
mutable_data
<
T
>
();
auto
x_data
=
x
->
template
data
<
T
>();
auto
y_data
=
y
->
template
data
<
T
>();
auto
out_data
=
out
->
template
mutable_data
<
T
>();
auto
x_seq_offset
=
x
->
lod
()[
0
];
auto
y_seq_offset
=
y
->
lod
()[
0
];
int
seq_num
=
x_seq_offset
.
size
()
-
1
;
...
...
lite/kernels/x86/sequence_concat_compute.h
浏览文件 @
77734ce7
...
...
@@ -25,7 +25,7 @@ namespace x86 {
template
<
typename
T
>
inline
LoD
ConcatLoD
(
const
std
::
vector
<
lite
::
Tensor
*>&
xs
,
std
::
vector
<
lite
::
Tensor
>*
xs_in_order
)
{
std
::
vector
<
size
_t
>
result
;
std
::
vector
<
uint64
_t
>
result
;
result
.
resize
(
xs
[
0
]
->
lod
()[
0
].
size
());
for
(
size_t
i
=
1
;
i
<
result
.
size
();
++
i
)
{
...
...
@@ -75,7 +75,7 @@ class SequenceConcatCompute
out_dims
[
0
]
=
batch_size
;
param
.
Out
->
Resize
(
out_dims
);
T
*
dout
=
param
.
Out
->
mutable_data
<
T
>
();
T
*
dout
=
param
.
Out
->
template
mutable_data
<
T
>();
std
::
vector
<
lite
::
Tensor
>
x_in_order
;
param
.
Out
->
set_lod
(
ConcatLoD
<
T
>
(
param
.
X
,
&
x_in_order
));
...
...
lite/kernels/x86/sequence_concat_compute_test.cc
浏览文件 @
77734ce7
...
...
@@ -26,7 +26,7 @@ namespace x86 {
namespace
{
inline
LoD
ConcatLoD
(
const
std
::
vector
<
lite
::
Tensor
*>&
xs
,
std
::
vector
<
lite
::
Tensor
>*
xs_in_order
)
{
std
::
vector
<
size
_t
>
result
;
std
::
vector
<
uint64
_t
>
result
;
result
.
resize
(
xs
[
0
]
->
lod
()[
0
].
size
());
for
(
size_t
i
=
1
;
i
<
result
.
size
();
++
i
)
{
...
...
lite/kernels/x86/sequence_expand_as_compute.h
浏览文件 @
77734ce7
...
...
@@ -29,9 +29,10 @@ using Tensor = lite::Tensor;
template
<
typename
T
>
struct
SequenceExpandFunctor
{
void
operator
()(
const
Tensor
&
x
,
const
std
::
vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
Tensor
*
out
)
{
void
operator
()(
const
Tensor
&
x
,
const
std
::
vector
<
uint64_t
>
&
ref_lod
,
/*expand referenced lod*/
Tensor
*
out
)
{
int64_t
hight
=
x
.
dims
()[
0
];
int64_t
width
=
x
.
data_size
()
/
hight
;
...
...
@@ -39,13 +40,13 @@ struct SequenceExpandFunctor {
T
*
out_data
=
out
->
mutable_data
<
T
,
T
>
();
for
(
int
h_id
=
0
;
h_id
<
hight
;
++
h_id
)
{
size
_t
span
=
ref_lod
[
h_id
+
1
]
-
ref_lod
[
h_id
];
uint64
_t
span
=
ref_lod
[
h_id
+
1
]
-
ref_lod
[
h_id
];
if
(
span
==
0
)
continue
;
const
T
*
src
=
in_data
+
h_id
*
width
;
for
(
int64_t
w_id
=
0
;
w_id
<
width
;
++
w_id
)
{
for
(
u
int64_t
w_id
=
0
;
w_id
<
width
;
++
w_id
)
{
T
ele
=
src
[
w_id
];
size_t
offset
=
ref_lod
[
h_id
]
*
width
;
for
(
size
_t
k
=
0
;
k
<
span
;
++
k
)
{
for
(
uint64
_t
k
=
0
;
k
<
span
;
++
k
)
{
out_data
[
offset
+
k
*
width
+
w_id
]
=
ele
;
}
}
...
...
@@ -68,7 +69,7 @@ class SequenceExpandAsCompute
CHECK_EQ
(
y_lod
.
size
(),
1
);
CHECK_GT
(
y_lod
[
0
].
size
(),
1
);
out
->
mutable_data
<
T
,
T
>
();
out
->
template
mutable_data
<
T
,
T
>();
SequenceExpandFunctor
<
T
>
seq_espand_functor
;
seq_espand_functor
(
*
x
,
y_lod
[
0
],
out
);
...
...
lite/kernels/x86/sequence_pool_compute.h
浏览文件 @
77734ce7
...
...
@@ -40,7 +40,7 @@ class SequencePoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
dims
[
0
]
=
lod
[
0
].
size
()
-
1
;
out
->
Resize
({
dims
});
out
->
mutable_data
<
T
>
();
out
->
template
mutable_data
<
T
>();
lite
::
Tensor
*
index
=
nullptr
;
const
bool
is_test
=
true
;
...
...
lite/kernels/x86/sequence_reshape_compute.h
浏览文件 @
77734ce7
...
...
@@ -64,9 +64,9 @@ class SequenceReshapeCompute
out
->
Resize
(
std
::
vector
<
int64_t
>
{
static_cast
<
int64_t
>
(
out
->
lod
()[
0
].
back
()),
out_width
});
auto
*
dst_ptr
=
out
->
mutable_data
<
T
>
();
auto
*
dst_ptr
=
out
->
template
mutable_data
<
T
>();
auto
size
=
in
->
numel
()
*
sizeof
(
T
);
std
::
memcpy
(
dst_ptr
,
in
->
data
<
T
>
(),
size
);
std
::
memcpy
(
dst_ptr
,
in
->
template
data
<
T
>(),
size
);
}
virtual
~
SequenceReshapeCompute
()
=
default
;
...
...
lite/kernels/x86/shape_compute.h
浏览文件 @
77734ce7
...
...
@@ -29,7 +29,7 @@ class ShapeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
ShapeParam
>
();
// auto& context = context_->As<X86Context>();
auto
out_data
=
param
.
Out
->
mutable_data
<
int32_t
>
();
auto
out_data
=
param
.
Out
->
template
mutable_data
<
int32_t
>();
auto
in_dims
=
param
.
X
->
dims
();
for
(
int
i
=
0
;
i
<
in_dims
.
size
();
++
i
)
{
out_data
[
i
]
=
in_dims
[
i
];
...
...
lite/kernels/x86/softmax_compute.h
浏览文件 @
77734ce7
...
...
@@ -58,7 +58,7 @@ class SoftmaxCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto
*
x
=
param
.
x
;
auto
*
output
=
param
.
output
;
output
->
mutable_data
<
T
>
();
output
->
template
mutable_data
<
T
>();
const
int
rank
=
x
->
dims
().
size
();
const
int
axis
=
CanonicalAxis
(
param
.
axis
,
rank
);
...
...
lite/kernels/x86/squeeze_compute.h
浏览文件 @
77734ce7
...
...
@@ -35,8 +35,8 @@ class SqueezeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto
x
=
param
.
X
;
auto
output
=
param
.
Out
;
auto
x_dims
=
x
->
dims
();
auto
*
x_data
=
x
->
data
<
T
>
();
auto
*
out_data
=
output
->
mutable_data
<
T
>
();
auto
*
x_data
=
x
->
template
data
<
T
>();
auto
*
out_data
=
output
->
template
mutable_data
<
T
>();
memcpy
(
out_data
,
x_data
,
x_dims
.
production
()
*
sizeof
(
T
));
}
...
...
@@ -54,9 +54,9 @@ class Squeeze2Compute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto
output
=
param
.
Out
;
auto
xshape
=
param
.
XShape
;
auto
x_dims
=
x
->
dims
();
auto
*
x_data
=
x
->
data
<
T
>
();
auto
*
out_data
=
output
->
mutable_data
<
T
>
();
auto
*
xshape_data
=
xshape
->
mutable_data
<
T
>
();
auto
*
x_data
=
x
->
template
data
<
T
>();
auto
*
out_data
=
output
->
template
mutable_data
<
T
>();
auto
*
xshape_data
=
xshape
->
template
mutable_data
<
T
>();
memcpy
(
out_data
,
x_data
,
x_dims
.
production
()
*
sizeof
(
T
));
memcpy
(
xshape_data
,
x_data
,
x_dims
.
production
()
*
sizeof
(
T
));
}
...
...
lite/kernels/x86/stack_compute.h
浏览文件 @
77734ce7
...
...
@@ -40,9 +40,9 @@ class StackCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
if
(
axis
<
0
)
axis
+=
(
x
[
0
]
->
dims
().
size
()
+
1
);
int
n
=
static_cast
<
int
>
(
x
.
size
());
auto
y_data
=
y
->
mutable_data
<
T
>
();
auto
y_data
=
y
->
template
mutable_data
<
T
>();
std
::
vector
<
const
T
*>
x_datas
(
n
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
x_datas
[
i
]
=
x
[
i
]
->
data
<
T
>
();
for
(
int
i
=
0
;
i
<
n
;
++
i
)
x_datas
[
i
]
=
x
[
i
]
->
template
data
<
T
>();
int
pre
=
1
,
post
=
1
;
auto
dim
=
x
[
0
]
->
dims
();
...
...
lite/kernels/x86/transpose_compute.h
浏览文件 @
77734ce7
...
...
@@ -73,7 +73,7 @@ class TransposeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
*
x
=
param
.
x
;
auto
*
out
=
param
.
output
;
out
->
mutable_data
<
T
>
();
out
->
template
mutable_data
<
T
>();
int
ndims
=
param
.
axis
.
size
();
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
TransCompute
<
lite
::
TargetType
::
kX86
,
T
>
(
...
...
@@ -92,7 +92,7 @@ class Transpose2Compute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
*
x
=
param
.
x
;
auto
*
out
=
param
.
output
;
out
->
mutable_data
<
T
>
();
out
->
template
mutable_data
<
T
>();
int
ndims
=
param
.
axis
.
size
();
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
TransCompute
<
lite
::
TargetType
::
kX86
,
T
>
(
...
...
lite/kernels/x86/uniform_random_compute.cc
浏览文件 @
77734ce7
...
...
@@ -34,8 +34,8 @@ class UniformRandomCompute
auto
*
param_out
=
&
param
.
Out
->
raw_tensor
();
T
*
data
=
param_out
->
mutable_data
<
T
>
(
context
.
x86_device_context
()
->
GetPlace
());
T
*
data
=
param_out
->
template
mutable_data
<
T
>(
context
.
x86_device_context
()
->
GetPlace
());
unsigned
int
seed
=
static_cast
<
unsigned
int
>
(
param
.
seed
);
std
::
minstd_rand
engine
;
...
...
lite/kernels/x86/var_conv_2d_compute.h
浏览文件 @
77734ce7
...
...
@@ -80,7 +80,7 @@ class VarConv2DCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
std
::
vector
<
int64_t
>
col_dims_vec
{
top_size
};
col_dims_vec
.
push_back
(
1
);
col
->
Resize
(
col_dims_vec
);
auto
*
top_data
=
col
->
mutable_data
<
T
>
();
auto
*
top_data
=
col
->
template
mutable_data
<
T
>();
const
auto
*
bottom_data
=
input
.
data
<
T
>
();
int
kernel_win_size
=
kernel_h
*
kernel_w
;
...
...
@@ -149,7 +149,7 @@ class VarConv2DCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// const auto& offset_y = in_row->lod()[0];
const
auto
&
offset_y
=
param
.
X
->
lod
()[
1
];
const
auto
&
offset_x
=
param
.
X
->
lod
()[
2
];
std
::
vector
<
size
_t
>
top_offset
;
std
::
vector
<
uint64
_t
>
top_offset
;
int
top_size
=
0
;
top_offset
.
push_back
(
top_size
);
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
...
...
@@ -178,9 +178,9 @@ class VarConv2DCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
std
::
vector
<
int64_t
>
top_dims_vec
{
top_size
};
top_dims_vec
.
push_back
(
1
);
top
->
Resize
(
top_dims_vec
);
auto
*
top_data
=
top
->
mutable_data
<
T
>
();
const
auto
*
w_data
=
w
->
data
<
T
>
();
const
auto
*
col_data
=
col
->
data
<
T
>
();
auto
*
top_data
=
top
->
template
mutable_data
<
T
>();
const
auto
*
w_data
=
w
->
template
data
<
T
>();
const
auto
*
col_data
=
col
->
template
data
<
T
>();
auto
blas
=
lite
::
x86
::
math
::
GetBlas
<
lite
::
TargetType
::
kX86
,
T
>
(
context
);
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
...
...
lite/kernels/x86/var_conv_2d_compute_test.cc
浏览文件 @
77734ce7
...
...
@@ -140,7 +140,7 @@ static void var_conv_2d_ref(const lite::Tensor* bottom,
const
auto
&
col_offset
=
col
->
lod
()[
0
];
const
auto
&
offset_x
=
in_col
->
lod
()[
0
];
const
auto
&
offset_y
=
in_row
->
lod
()[
0
];
std
::
vector
<
size
_t
>
top_offset
;
std
::
vector
<
uint64
_t
>
top_offset
;
int
top_size
=
0
;
top_offset
.
push_back
(
top_size
);
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录